/* -*- mode: c -*-
 *
 * $Id: mod_encoding.c,v 1.6 2001/12/11 12:55:38 tai Exp $
 *
 */
/*
 * mod_encoding core module test implementation for Apache2.
 *  by Kunio Miyamoto (wakatono@todo.gr.jp)
 * Original security fix port 2002/06/06
 *  by Kunio Miyamoto (wakatono@todo.gr.jp)
 */

#include <httpd.h>
#include <http_config.h>
#include <http_core.h>
#include <http_log.h>
#include <http_protocol.h>
#include <http_request.h>
/* #include <ap_compat.h> */

#include <iconv.h>

/**
 * Core part of the module.
 * Here, the module hooks into filename translation stage,
 * and converts all non-ascii-or-utf8 expressions into
 * appropriate encoding speficied by the configuration.
 *
 * Note UTF-8 is the implicit default and will be tried first,
 * regardless of user configuration.
 */

#ifndef MOD_ENCODING_DEBUG
#ifdef DEBUG
#define MOD_ENCODING_DEBUG 1
#else
#define MOD_ENCODING_DEBUG 0
#endif
#endif

#define DBG(expr) if (MOD_ENCODING_DEBUG) { expr; }

/* FIXME: remove gcc-dependency */

#define ENABLE_FLAG_UNSET 0
#define ENABLE_FLAG_OFF   1
#define ENABLE_FLAG_ON    2

/**
 * module-local information storage structure
 */
typedef struct {
  int           enable_function;  /* flag to enable this module */
  char         *server_encoding;  /* server-side filesystem encoding */
  apr_array_header_t *client_encoding;  /* useragent-to-encoding-list sets */
  apr_array_header_t *default_encoding; /* useragent-to-encoding-list sets */
} encoding_config;

module AP_MODULE_DECLARE_DATA encoding_module;

/***************************************************************************
 * utility methods
 ***************************************************************************/

/**
 * Converts encoding of the input string.
 * Returns NULL on error, else, appropriate string on success.
 *
 * @param p      Memory pool of apache
 * @param cd     Conversion descriptor, made by iconv_open(3).
 * @param srcbuf Input string
 * @param srclen Length of the input string. Usually strlen(srcbuf).
 */
static char *
iconv_string(request_rec *r, iconv_t cd, char *srcbuf, size_t srclen) {

  char   *outbuf, *marker;
  size_t  outlen;

  if (srclen == 0) {
    return srcbuf;
  }

  /* Allocate space for conversion. Note max bloat factor is 4 of UCS-4 */
  marker = outbuf = (char *)apr_palloc(r->pool, outlen = srclen * 4 + 1);

  if (outbuf == NULL) {
    return NULL;
  }

  /* Convert every character within input string. */
  while (srclen > 0) {
    if (iconv(cd, &srcbuf, &srclen, &outbuf, &outlen) == (size_t)(-1)) {
      return NULL;
    }
  }

  /* Everything done. Flush buffer/state and return result */
  iconv(cd, NULL, NULL, &outbuf, &outlen);
  iconv(cd, NULL, NULL, NULL, NULL);

  *outbuf = '\0';

  return marker;
}

/**
 * Nomalize charset in HTTP request line and HTTP header(s).
 * Returns 0 on success, -1 (non-zero) on error.
 *
 * FIXME: Should handle/consider partial success?
 *
 * @param  r Apache request object structure
 * @param cd Conversion descriptor, made by iconv_open(3).
 */
static int
iconv_header(request_rec *r, iconv_t cd) {

  char *buff;
  char *keys[] = { "Destination", NULL };
  int   i;

  /* Normalize encoding in HTTP request line */
  ap_unescape_url(r->unparsed_uri);
  if ((buff = iconv_string(r, cd, r->unparsed_uri,
			   strlen(r->unparsed_uri))) == NULL)
    return -1;
  ap_parse_uri(r, buff);

  /* Normalize encoding in HTTP request header(s) */
  for (i = 0 ; keys[i] ; i++) {
    if ((buff = (char *)apr_table_get(r->headers_in, keys[i])) != NULL) {
      ap_unescape_url(buff);
      if ((buff = iconv_string(r, cd, buff, strlen(buff))) == NULL)
	return -1;
      apr_table_set(r->headers_in, keys[i], buff);
    }
  }

  return 0;
}

/**
 * Return the list of encoding(s) (defaults to (list "UTF-8"))
 * which named client is expected to send.
 *
 * @param r      Apache request object structure
 * @param encmap Table of UA-to-encoding(s)
 * @param lookup Name of the useragent to look for
 */
static apr_array_header_t *
get_client_encoding(request_rec *r,
		    apr_array_header_t *encmap, const char *lookup) {
  void         **list = (void **)encmap->elts;
  apr_array_header_t  *encs = apr_array_make(r->pool, 1, sizeof(char *));

  int i;


  /* push UTF-8 as the first candidate of expected encoding */
  *((char **)apr_array_push(encs)) = apr_pstrdup(r->pool, "UTF-8");

  if (! lookup)
    return encs;


  for (i = 0 ; i < encmap->nelts ; i += 2) {
    if (ap_regexec((regex_t *)list[i], lookup, 0, NULL, 0) == 0) {
      apr_array_cat(encs, (apr_array_header_t *)list[i + 1]);
      return encs;
    }
  }

  return encs;
}

/**
 * Handler for "EncodingEngine" directive.
 */
static const char *
set_encoding_engine(cmd_parms *cmd, encoding_config *conf, int flag) {

  if (! cmd->path) {
    conf = ap_get_module_config(cmd->server->module_config, &encoding_module);
  }
  conf->enable_function = flag ? ENABLE_FLAG_ON : ENABLE_FLAG_OFF;

  return NULL;
}

/**
 * Handler for "SetServerEncoding" directive.
 */
static const char *
set_server_encoding(cmd_parms *cmd, encoding_config *conf, char *arg) {

  if (! cmd->path) {
    conf = ap_get_module_config(cmd->server->module_config, &encoding_module);
  }
  conf->server_encoding = apr_pstrdup(cmd->pool, arg);
  
  return NULL;
}

/**
 * Handler for "AddClientEncoding" directive.
 *
 * This registers regex pattern of UserAgent: header and expected
 * encoding(s) from that useragent.
 */
static const char *
add_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) {
  apr_array_header_t    *encs;
  char            *arg;


  if (! cmd->path) {
    conf = ap_get_module_config(cmd->server->module_config, &encoding_module);
  }

  encs = apr_array_make(cmd->pool, 1, sizeof(void *));

  /* register useragent with UserAgent: pattern */
  if (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) {
    *(void **)apr_array_push(conf->client_encoding) =
      ap_pregcomp(cmd->pool, arg, REG_EXTENDED|REG_ICASE|REG_NOSUB);
  }

  /* register list of possible encodings from above useragent */
  while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) {
    *(void **)apr_array_push(encs) = apr_pstrdup(cmd->pool, arg);
  }
  *(void **)apr_array_push(conf->client_encoding) = encs;

  return NULL;
}

/**
 * Handler for "DefaultClientEncoding" directive.
 *
 * This registers encodings that work as "fallback" for most clients.
 */
static const char *
default_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) {
  char *arg;


  if (! cmd->path) {
    conf = ap_get_module_config(cmd->server->module_config, &encoding_module);
  }

  conf->default_encoding = apr_array_make(cmd->pool, 1, sizeof(char *));

  /* register list of possible encodings as a default */
  while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) {
    *(void **)apr_array_push(conf->default_encoding)
      = apr_pstrdup(cmd->pool, arg);
  }

  return NULL;
}

/***************************************************************************
 * module-unique command table
 *
 * FIXME: decide which scope to apply for following directives
 ***************************************************************************/

static const command_rec mod_enc_commands[] = {
  {"EncodingEngine",
   set_encoding_engine, NULL,
   OR_ALL, FLAG,  "Usage: EncodingEngine (on|off)"},

  {"SetServerEncoding",
   set_server_encoding, NULL,
   OR_ALL, TAKE1, "Usage: SetServerEncoding <encname>"},

  {"AddClientEncoding",
   add_client_encoding, NULL,
   OR_ALL, RAW_ARGS, "Usage: AddClientEncoding <agent-name> <enclist>"},

  {"DefaultClientEncoding",
   default_client_encoding, NULL,
   OR_ALL, RAW_ARGS, "Usage: DefaultClientEncoding <enclist>"},

  {NULL}
};

/***************************************************************************
 * module methods
 ***************************************************************************/

/**
 * Setup server-level module internal data strcuture.
 */
static void *
server_setup(apr_pool_t *p, server_rec *s) {
  encoding_config *conf;

  DBG(fprintf(stderr, "server_setup: entered\n"));

  conf = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config));
  conf->enable_function  = ENABLE_FLAG_UNSET;
  conf->server_encoding  = NULL;
  conf->client_encoding  = apr_array_make(p, 2, sizeof(void *));
  conf->default_encoding = NULL;

  return conf;
}

/**
 * Setup folder-level module internal data strcuture.
 */
static void *
folder_setup(apr_pool_t *p, char *dir) {
  DBG(fprintf(stderr, "folder_setup: entered\n"));
  return server_setup(p, NULL);
}

/**
 * Merge configuration.
 */
static void *
config_merge(apr_pool_t *p, void *base, void *override) {
  encoding_config *parent = base;
  encoding_config *child  = override;
  encoding_config *merge;

  DBG(fprintf(stderr, "config_merge: entered\n"));

  merge = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config));

  if (child->enable_function != ENABLE_FLAG_UNSET)
    merge->enable_function =  child->enable_function;
  else
    merge->enable_function = parent->enable_function;

  DBG(fprintf(stderr,
	      "merged: enable_function == %d\n", merge->enable_function));

  if (child->server_encoding)
    merge->server_encoding =  child->server_encoding;
  else
    merge->server_encoding = parent->server_encoding;

  DBG(fprintf(stderr,
	      "merged: server_encoding == %s\n", merge->server_encoding));

  if (child->default_encoding)
    merge->default_encoding =  child->default_encoding;
  else
    merge->default_encoding = parent->default_encoding;

  merge->client_encoding =
    apr_array_append(p, child->client_encoding, parent->client_encoding);

  return merge;
}

/**
 * Handler for encoding conversion.
 *
 * Here, expected encoding by client/server is determined, and
 * whenever needed, client input will be converted to that of
 * server-side expected encoding.
 */
static int
mod_enc_convert(request_rec *r) {
  encoding_config  *conf;
  encoding_config *dconf;
  encoding_config *sconf;

  const char      *oenc; /* server-side encoding */
  apr_array_header_t    *ienc; /* list of possible encodings */
  void           **list; /* same as above (for iteration) */

  iconv_t cd;            /* conversion descriptor */

  int i;


  sconf = ap_get_module_config(r->server->module_config, &encoding_module);
  dconf = ap_get_module_config(r->per_dir_config, &encoding_module);
   conf = config_merge(r->pool, sconf, dconf);

  if (conf->enable_function != ENABLE_FLAG_ON) {
    return DECLINED;
  }

  oenc = conf->server_encoding ? conf->server_encoding : "UTF-8";
  ienc = get_client_encoding(r, conf->client_encoding,
			     apr_table_get(r->headers_in, "User-Agent"));

  if (conf->default_encoding)
    apr_array_cat(ienc, conf->default_encoding);

  list = (void **)ienc->elts;


  /* try specified encodings in order */
  for (i = 0 ; i < ienc->nelts ; i++) {

    /* pick appropriate converter module */
    if ((cd = iconv_open(oenc, list[i])) == (iconv_t)(-1))
      continue;

    /* conversion tryout */
    if (iconv_header(r, cd) == 0) {
      iconv_close(cd);
      return DECLINED;
    }

    /* don't fail, but just continue on until list ends */
    iconv_close(cd);
  }


  return DECLINED;
}

static void register_hooks(apr_pool_t *p)
{
	/* filename-to-URI translation */
/*	ap_hook_translate_name(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST); */
	ap_hook_post_read_request(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST);
}

/***************************************************************************
 * exported module structure
 ***************************************************************************/

module AP_MODULE_DECLARE_DATA encoding_module = {
  STANDARD20_MODULE_STUFF,
  folder_setup,     /* create per-directory config structure */
  config_merge,     /* merge per-directory(?) config str */
  server_setup,     /* create per-server config structure */
  config_merge,     /* merge per-server config ...*/
  mod_enc_commands, /* command handlers */
  register_hooks
};


