1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * simple hokey charset recoding configuration module
19 *
20 * See mod_ebcdic and mod_charset for more thought-out examples. This
21 * one is just so Jeff can learn how a module works and experiment with
22 * basic character set recoding configuration.
23 *
24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
25 */
26
27 #include "httpd.h"
28 #include "http_config.h"
29
30 #include "http_core.h"
31 #include "http_log.h"
32 #include "http_main.h"
33 #include "http_protocol.h"
34 #include "http_request.h"
35 #include "util_charset.h"
36 #include "apr_buckets.h"
37 #include "util_filter.h"
38 #include "apr_strings.h"
39 #include "apr_lib.h"
40 #include "apr_xlate.h"
41 #define APR_WANT_STRFUNC
42 #include "apr_want.h"
43
44 #define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
45 #define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
46
47 #define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
48 * space left in the translation buffer
49 */
50
51 #define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
52 * two buckets
53 */
54
55 /* extended error status codes; this is used in addition to an apr_status_t to
56 * track errors in the translation filter
57 */
58 typedef enum {
59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
60 EES_LIMIT, /* built-in restriction encountered */
61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
62 EES_BUCKET_READ,
63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
64 EES_BAD_INPUT /* input data invalid */
65 } ees_t;
66
67 /* registered name of the output translation filter */
68 #define XLATEOUT_FILTER_NAME "XLATEOUT"
69 /* registered name of input translation filter */
70 #define XLATEIN_FILTER_NAME "XLATEIN"
71
72 typedef struct charset_dir_t {
73 const char *charset_source; /* source encoding */
74 const char *charset_default; /* how to ship on wire */
75 /** module does ap_add_*_filter()? */
76 enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
77 /** treat all mimetypes as text? */
78 enum {FX_INIT, FX_FORCE, FX_NOFORCE} force_xlate;
79 } charset_dir_t;
80
81 /* charset_filter_ctx_t is created for each filter instance; because the same
82 * filter code is used for translating in both directions, we need this context
83 * data to tell the filter which translation handle to use; it also can hold a
84 * character which was split between buckets
85 */
86 typedef struct charset_filter_ctx_t {
87 apr_xlate_t *xlate;
88 int is_sb; /* single-byte translation? */
89 charset_dir_t *dc;
90 ees_t ees; /* extended error status */
91 apr_size_t saved;
92 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
93 int ran; /* has filter instance run before? */
94 int noop; /* should we pass brigades through unchanged? */
95 char *tmp; /* buffer for input filtering */
96 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
97 apr_bucket_brigade *tmpbb; /* used for passing downstream */
98 } charset_filter_ctx_t;
99
100 /* charset_req_t is available via r->request_config if any translation is
101 * being performed
102 */
103 typedef struct charset_req_t {
104 charset_dir_t *dc;
105 charset_filter_ctx_t *output_ctx, *input_ctx;
106 } charset_req_t;
107
108 module AP_MODULE_DECLARE_DATA charset_lite_module;
109
create_charset_dir_conf(apr_pool_t * p,char * dummy)110 static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
111 {
112 charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));
113
114 return dc;
115 }
116
merge_charset_dir_conf(apr_pool_t * p,void * basev,void * overridesv)117 static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
118 {
119 charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
120 charset_dir_t *base = (charset_dir_t *)basev,
121 *over = (charset_dir_t *)overridesv;
122
123 /* If it is defined in the current container, use it. Otherwise, use the one
124 * from the enclosing container.
125 */
126
127 a->charset_default =
128 over->charset_default ? over->charset_default : base->charset_default;
129 a->charset_source =
130 over->charset_source ? over->charset_source : base->charset_source;
131 a->implicit_add =
132 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
133 a->force_xlate=
134 over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate;
135 return a;
136 }
137
138 /* CharsetSourceEnc charset
139 */
add_charset_source(cmd_parms * cmd,void * in_dc,const char * name)140 static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
141 const char *name)
142 {
143 charset_dir_t *dc = in_dc;
144
145 dc->charset_source = name;
146 return NULL;
147 }
148
149 /* CharsetDefault charset
150 */
add_charset_default(cmd_parms * cmd,void * in_dc,const char * name)151 static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
152 const char *name)
153 {
154 charset_dir_t *dc = in_dc;
155
156 dc->charset_default = name;
157 return NULL;
158 }
159
160 /* CharsetOptions optionflag...
161 */
add_charset_options(cmd_parms * cmd,void * in_dc,const char * flag)162 static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
163 const char *flag)
164 {
165 charset_dir_t *dc = in_dc;
166
167 if (!strcasecmp(flag, "ImplicitAdd")) {
168 dc->implicit_add = IA_IMPADD;
169 }
170 else if (!strcasecmp(flag, "NoImplicitAdd")) {
171 dc->implicit_add = IA_NOIMPADD;
172 }
173 else if (!strcasecmp(flag, "TranslateAllMimeTypes")) {
174 dc->force_xlate = FX_FORCE;
175 }
176 else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) {
177 dc->force_xlate = FX_NOFORCE;
178 }
179 else {
180 return apr_pstrcat(cmd->temp_pool,
181 "Invalid CharsetOptions option: ",
182 flag,
183 NULL);
184 }
185
186 return NULL;
187 }
188
189 /* find_code_page() is a fixup hook that checks if the module is
190 * configured and the input or output potentially need to be translated.
191 * If so, context is initialized for the filters.
192 */
find_code_page(request_rec * r)193 static int find_code_page(request_rec *r)
194 {
195 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
196 &charset_lite_module);
197 charset_req_t *reqinfo;
198 charset_filter_ctx_t *input_ctx, *output_ctx;
199 apr_status_t rv;
200
201 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
202 "uri: %s file: %s method: %d "
203 "imt: %s flags: %s%s%s %s->%s",
204 r->uri,
205 r->filename ? r->filename : "(none)",
206 r->method_number,
207 r->content_type ? r->content_type : "(unknown)",
208 r->main ? "S" : "", /* S if subrequest */
209 r->prev ? "R" : "", /* R if redirect */
210 r->proxyreq ? "P" : "", /* P if proxy */
211 dc->charset_source, dc->charset_default);
212
213 /* If we don't have a full directory configuration, bail out.
214 */
215 if (!dc->charset_source || !dc->charset_default) {
216 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01448)
217 "incomplete configuration: src %s, dst %s",
218 dc->charset_source ? dc->charset_source : "unspecified",
219 dc->charset_default ? dc->charset_default : "unspecified");
220 return DECLINED;
221 }
222
223 /* catch proxy requests */
224 if (r->proxyreq) {
225 return DECLINED;
226 }
227
228 /* mod_rewrite indicators */
229 if (r->filename
230 && (!strncmp(r->filename, "redirect:", 9)
231 || !strncmp(r->filename, "gone:", 5)
232 || !strncmp(r->filename, "passthrough:", 12)
233 || !strncmp(r->filename, "forbidden:", 10))) {
234 return DECLINED;
235 }
236
237 /* no translation when server and network charsets are set to the same value */
238 if (!strcasecmp(dc->charset_source, dc->charset_default)) {
239 return DECLINED;
240 }
241
242 /* Get storage for the request data and the output filter context.
243 * We rarely need the input filter context, so allocate that separately.
244 */
245 reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
246 sizeof(charset_req_t) +
247 sizeof(charset_filter_ctx_t));
248 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
249
250 reqinfo->dc = dc;
251 output_ctx->dc = dc;
252 output_ctx->tmpbb = apr_brigade_create(r->pool,
253 r->connection->bucket_alloc);
254 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
255
256 reqinfo->output_ctx = output_ctx;
257
258 switch (r->method_number) {
259 case M_PUT:
260 case M_POST:
261 /* Set up input translation. Note: A request body can be included
262 * with the OPTIONS method, but for now we don't set up translation
263 * of it.
264 */
265 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
266 input_ctx->bb = apr_brigade_create(r->pool,
267 r->connection->bucket_alloc);
268 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
269 input_ctx->dc = dc;
270 reqinfo->input_ctx = input_ctx;
271 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
272 dc->charset_default, r->pool);
273 if (rv != APR_SUCCESS) {
274 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01449)
275 "can't open translation %s->%s",
276 dc->charset_default, dc->charset_source);
277 return HTTP_INTERNAL_SERVER_ERROR;
278 }
279 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
280 input_ctx->is_sb = 0;
281 }
282 }
283
284 return DECLINED;
285 }
286
configured_in_list(request_rec * r,const char * filter_name,struct ap_filter_t * filter_list)287 static int configured_in_list(request_rec *r, const char *filter_name,
288 struct ap_filter_t *filter_list)
289 {
290 struct ap_filter_t *filter = filter_list;
291
292 while (filter) {
293 if (!strcasecmp(filter_name, filter->frec->name)) {
294 return 1;
295 }
296 filter = filter->next;
297 }
298 return 0;
299 }
300
configured_on_input(request_rec * r,const char * filter_name)301 static int configured_on_input(request_rec *r, const char *filter_name)
302 {
303 return configured_in_list(r, filter_name, r->input_filters);
304 }
305
configured_on_output(request_rec * r,const char * filter_name)306 static int configured_on_output(request_rec *r, const char *filter_name)
307 {
308 return configured_in_list(r, filter_name, r->output_filters);
309 }
310
311 /* xlate_insert_filter() is a filter hook which decides whether or not
312 * to insert a translation filter for the current request.
313 */
xlate_insert_filter(request_rec * r)314 static void xlate_insert_filter(request_rec *r)
315 {
316 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
317 charset_req_t *reqinfo = ap_get_module_config(r->request_config,
318 &charset_lite_module);
319 charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
320 &charset_lite_module);
321
322 if (dc && (dc->implicit_add == IA_NOIMPADD)) {
323 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, r,
324 "xlate output filter not added implicitly because "
325 "CharsetOptions included 'NoImplicitAdd'");
326 return;
327 }
328
329 if (reqinfo) {
330 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
331 ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
332 r->connection);
333 }
334 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
335 "xlate output filter not added implicitly because %s",
336 !reqinfo->output_ctx ?
337 "no output configuration available" :
338 "another module added the filter");
339
340 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
341 ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
342 r->connection);
343 }
344 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
345 "xlate input filter not added implicitly because %s",
346 !reqinfo->input_ctx ?
347 "no input configuration available" :
348 "another module added the filter");
349 }
350 }
351
352 /* stuff that sucks that I know of:
353 *
354 * bucket handling:
355 * why create an eos bucket when we see it come down the stream? just send the one
356 * passed as input... news flash: this will be fixed when xlate_out_filter() starts
357 * using the more generic xlate_brigade()
358 *
359 * translation mechanics:
360 * we don't handle characters that straddle more than two buckets; an error
361 * will be generated
362 */
363
send_bucket_downstream(ap_filter_t * f,apr_bucket * b)364 static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b)
365 {
366 charset_filter_ctx_t *ctx = f->ctx;
367 apr_status_t rv;
368
369 APR_BRIGADE_INSERT_TAIL(ctx->tmpbb, b);
370 rv = ap_pass_brigade(f->next, ctx->tmpbb);
371 if (rv != APR_SUCCESS) {
372 ctx->ees = EES_DOWNSTREAM;
373 }
374 apr_brigade_cleanup(ctx->tmpbb);
375 return rv;
376 }
377
378 /* send_downstream() is passed the translated data; it puts it in a single-
379 * bucket brigade and passes the brigade to the next filter
380 */
send_downstream(ap_filter_t * f,const char * tmp,apr_size_t len)381 static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
382 {
383 request_rec *r = f->r;
384 conn_rec *c = r->connection;
385 apr_bucket *b;
386
387 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
388 return send_bucket_downstream(f, b);
389 }
390
send_eos(ap_filter_t * f)391 static apr_status_t send_eos(ap_filter_t *f)
392 {
393 request_rec *r = f->r;
394 conn_rec *c = r->connection;
395 apr_bucket_brigade *bb;
396 apr_bucket *b;
397 charset_filter_ctx_t *ctx = f->ctx;
398 apr_status_t rv;
399
400 bb = apr_brigade_create(r->pool, c->bucket_alloc);
401 b = apr_bucket_eos_create(c->bucket_alloc);
402 APR_BRIGADE_INSERT_TAIL(bb, b);
403 rv = ap_pass_brigade(f->next, bb);
404 if (rv != APR_SUCCESS) {
405 ctx->ees = EES_DOWNSTREAM;
406 }
407 return rv;
408 }
409
set_aside_partial_char(charset_filter_ctx_t * ctx,const char * partial,apr_size_t partial_len)410 static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
411 const char *partial,
412 apr_size_t partial_len)
413 {
414 apr_status_t rv;
415
416 if (sizeof(ctx->buf) > partial_len) {
417 ctx->saved = partial_len;
418 memcpy(ctx->buf, partial, partial_len);
419 rv = APR_SUCCESS;
420 }
421 else {
422 rv = APR_INCOMPLETE;
423 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
424 * buckets
425 */
426 }
427 return rv;
428 }
429
finish_partial_char(charset_filter_ctx_t * ctx,const char ** cur_str,apr_size_t * cur_len,char ** out_str,apr_size_t * out_len)430 static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
431 /* input buffer: */
432 const char **cur_str,
433 apr_size_t *cur_len,
434 /* output buffer: */
435 char **out_str,
436 apr_size_t *out_len)
437 {
438 apr_status_t rv;
439 apr_size_t tmp_input_len;
440
441 /* Keep adding bytes from the input string to the saved string until we
442 * 1) finish the input char
443 * 2) get an error
444 * or 3) run out of bytes to add
445 */
446
447 do {
448 ctx->buf[ctx->saved] = **cur_str;
449 ++ctx->saved;
450 ++*cur_str;
451 --*cur_len;
452 tmp_input_len = ctx->saved;
453 rv = apr_xlate_conv_buffer(ctx->xlate,
454 ctx->buf,
455 &tmp_input_len,
456 *out_str,
457 out_len);
458 } while (rv == APR_INCOMPLETE && *cur_len);
459
460 if (rv == APR_SUCCESS) {
461 ctx->saved = 0;
462 }
463 else {
464 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
465 * straddling more than two buckets
466 */
467 }
468
469 return rv;
470 }
471
log_xlate_error(ap_filter_t * f,apr_status_t rv)472 static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
473 {
474 charset_filter_ctx_t *ctx = f->ctx;
475 const char *msg;
476 char msgbuf[100];
477 apr_size_t len;
478
479 switch(ctx->ees) {
480 case EES_LIMIT:
481 rv = 0;
482 msg = APLOGNO(02193) "xlate filter - a built-in restriction was encountered";
483 break;
484 case EES_BAD_INPUT:
485 rv = 0;
486 msg = APLOGNO(02194) "xlate filter - an input character was invalid";
487 break;
488 case EES_BUCKET_READ:
489 rv = 0;
490 msg = APLOGNO(02195) "xlate filter - bucket read routine failed";
491 break;
492 case EES_INCOMPLETE_CHAR:
493 rv = 0;
494 strcpy(msgbuf, APLOGNO(02196) "xlate filter - incomplete char at end of input - ");
495 len = ctx->saved;
496
497 /* We must ensure not to process more than what would fit in the
498 * remaining of the destination buffer, including terminating NULL */
499 if (len > (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2)
500 len = (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2;
501
502 ap_bin2hex(ctx->buf, len, msgbuf + strlen(msgbuf));
503 msg = msgbuf;
504 break;
505 case EES_DOWNSTREAM:
506 msg = APLOGNO(02197) "xlate filter - an error occurred in a lower filter";
507 break;
508 default:
509 msg = APLOGNO(02198) "xlate filter - returning error";
510 }
511 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(02997) "%s", msg);
512 }
513
514 /* chk_filter_chain() is called once per filter instance; it tries to
515 * determine if the current filter instance should be disabled because
516 * its translation is incompatible with the translation of an existing
517 * instance of the translate filter
518 *
519 * Example bad scenario:
520 *
521 * configured filter chain for the request:
522 * INCLUDES XLATEOUT(8859-1->UTS-16)
523 * configured filter chain for the subrequest:
524 * XLATEOUT(8859-1->UTS-16)
525 *
526 * When the subrequest is processed, the filter chain will be
527 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
528 * This makes no sense, so the instance of XLATEOUT added for the
529 * subrequest will be noop-ed.
530 *
531 * Example good scenario:
532 *
533 * configured filter chain for the request:
534 * INCLUDES XLATEOUT(8859-1->UTS-16)
535 * configured filter chain for the subrequest:
536 * XLATEOUT(IBM-1047->8859-1)
537 *
538 * When the subrequest is processed, the filter chain will be
539 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
540 * This makes sense, so the instance of XLATEOUT added for the
541 * subrequest will be left alone and it will translate from
542 * IBM-1047->8859-1.
543 */
chk_filter_chain(ap_filter_t * f)544 static void chk_filter_chain(ap_filter_t *f)
545 {
546 ap_filter_t *curf;
547 charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
548 *ctx = f->ctx;
549 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
550
551 if (ctx->noop) {
552 return;
553 }
554
555 /* walk the filter chain; see if it makes sense for our filter to
556 * do any translation
557 */
558 curf = output ? f->r->output_filters : f->r->input_filters;
559 while (curf) {
560 if (!strcasecmp(curf->frec->name, f->frec->name) &&
561 curf->ctx) {
562 curctx = (charset_filter_ctx_t *)curf->ctx;
563 if (!last_xlate_ctx) {
564 last_xlate_ctx = curctx;
565 }
566 else {
567 if (strcmp(last_xlate_ctx->dc->charset_default,
568 curctx->dc->charset_source)) {
569 /* incompatible translation
570 * if our filter instance is incompatible with an instance
571 * already in place, noop our instance
572 * Notes:
573 * . We are only willing to noop our own instance.
574 * . It is possible to noop another instance which has not
575 * yet run, but this is not currently implemented.
576 * Hopefully it will not be needed.
577 * . It is not possible to noop an instance which has
578 * already run.
579 */
580 if (last_xlate_ctx == f->ctx) {
581 last_xlate_ctx->noop = 1;
582 if (APLOGrtrace1(f->r)) {
583 const char *symbol = output ? "->" : "<-";
584
585 ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
586 0, f->r, APLOGNO(01451)
587 "%s %s - disabling "
588 "translation %s%s%s; existing "
589 "translation %s%s%s",
590 f->r->uri ? "uri" : "file",
591 f->r->uri ? f->r->uri : f->r->filename,
592 last_xlate_ctx->dc->charset_source,
593 symbol,
594 last_xlate_ctx->dc->charset_default,
595 curctx->dc->charset_source,
596 symbol,
597 curctx->dc->charset_default);
598 }
599 }
600 else {
601 const char *symbol = output ? "->" : "<-";
602
603 ap_log_rerror(APLOG_MARK, APLOG_ERR,
604 0, f->r, APLOGNO(01452)
605 "chk_filter_chain() - can't disable "
606 "translation %s%s%s; existing "
607 "translation %s%s%s",
608 last_xlate_ctx->dc->charset_source,
609 symbol,
610 last_xlate_ctx->dc->charset_default,
611 curctx->dc->charset_source,
612 symbol,
613 curctx->dc->charset_default);
614 }
615 break;
616 }
617 }
618 }
619 curf = curf->next;
620 }
621 }
622
623 /* xlate_brigade() is used to filter request and response bodies
624 *
625 * we'll stop when one of the following occurs:
626 * . we run out of buckets
627 * . we run out of space in the output buffer
628 * . we hit an error or metadata
629 *
630 * inputs:
631 * bb: brigade to process
632 * buffer: storage to hold the translated characters
633 * buffer_avail: size of buffer
634 * (and a few more uninteresting parms)
635 *
636 * outputs:
637 * return value: APR_SUCCESS or some error code
638 * bb: we've removed any buckets representing the
639 * translated characters; the eos bucket, if
640 * present, will be left in the brigade
641 * buffer: filled in with translated characters
642 * buffer_avail: updated with the bytes remaining
643 * hit_eos: did we hit an EOS bucket?
644 */
xlate_brigade(charset_filter_ctx_t * ctx,apr_bucket_brigade * bb,char * buffer,apr_size_t * buffer_avail,int * hit_eos)645 static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
646 apr_bucket_brigade *bb,
647 char *buffer,
648 apr_size_t *buffer_avail,
649 int *hit_eos)
650 {
651 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
652 apr_bucket *consumed_bucket;
653 const char *bucket;
654 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
655 apr_size_t bucket_avail; /* bytes left in current bucket */
656 apr_status_t rv = APR_SUCCESS;
657
658 *hit_eos = 0;
659 bucket_avail = 0;
660 consumed_bucket = NULL;
661 while (1) {
662 if (!bucket_avail) { /* no bytes left to process in the current bucket... */
663 if (consumed_bucket) {
664 apr_bucket_delete(consumed_bucket);
665 consumed_bucket = NULL;
666 }
667 b = APR_BRIGADE_FIRST(bb);
668 if (b == APR_BRIGADE_SENTINEL(bb) ||
669 APR_BUCKET_IS_METADATA(b)) {
670 break;
671 }
672 rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
673 if (rv != APR_SUCCESS) {
674 ctx->ees = EES_BUCKET_READ;
675 break;
676 }
677 bucket_avail = bytes_in_bucket;
678 consumed_bucket = b; /* for axing when we're done reading it */
679 }
680 if (bucket_avail) {
681 /* We've got data, so translate it. */
682 if (ctx->saved) {
683 /* Rats... we need to finish a partial character from the previous
684 * bucket.
685 *
686 * Strangely, finish_partial_char() increments the input buffer
687 * pointer but does not increment the output buffer pointer.
688 */
689 apr_size_t old_buffer_avail = *buffer_avail;
690 rv = finish_partial_char(ctx,
691 &bucket, &bucket_avail,
692 &buffer, buffer_avail);
693 buffer += old_buffer_avail - *buffer_avail;
694 }
695 else {
696 apr_size_t old_buffer_avail = *buffer_avail;
697 apr_size_t old_bucket_avail = bucket_avail;
698 rv = apr_xlate_conv_buffer(ctx->xlate,
699 bucket, &bucket_avail,
700 buffer,
701 buffer_avail);
702 buffer += old_buffer_avail - *buffer_avail;
703 bucket += old_bucket_avail - bucket_avail;
704
705 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
706 /* We need to save the final byte(s) for next time; we can't
707 * convert it until we look at the next bucket.
708 */
709 rv = set_aside_partial_char(ctx, bucket, bucket_avail);
710 bucket_avail = 0;
711 }
712 }
713 if (rv != APR_SUCCESS) {
714 /* bad input byte or partial char too big to store */
715 break;
716 }
717 if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
718 /* if any data remains in the current bucket, split there */
719 if (bucket_avail) {
720 apr_bucket_split(b, bytes_in_bucket - bucket_avail);
721 }
722 apr_bucket_delete(b);
723 break;
724 }
725 }
726 }
727
728 if (!APR_BRIGADE_EMPTY(bb)) {
729 b = APR_BRIGADE_FIRST(bb);
730 if (APR_BUCKET_IS_EOS(b)) {
731 /* Leave the eos bucket in the brigade for reporting to
732 * subsequent filters.
733 */
734 *hit_eos = 1;
735 if (ctx->saved) {
736 /* Oops... we have a partial char from the previous bucket
737 * that won't be completed because there's no more data.
738 */
739 rv = APR_INCOMPLETE;
740 ctx->ees = EES_INCOMPLETE_CHAR;
741 }
742 }
743 }
744
745 return rv;
746 }
747
748 /* xlate_out_filter() handles (almost) arbitrary conversions from one charset
749 * to another...
750 * translation is determined in the fixup hook (find_code_page), which is
751 * where the filter's context data is set up... the context data gives us
752 * the translation handle
753 */
xlate_out_filter(ap_filter_t * f,apr_bucket_brigade * bb)754 static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
755 {
756 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
757 &charset_lite_module);
758 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
759 &charset_lite_module);
760 charset_filter_ctx_t *ctx = f->ctx;
761 apr_bucket *dptr, *consumed_bucket;
762 const char *cur_str;
763 apr_size_t cur_len, cur_avail;
764 char tmp[OUTPUT_XLATE_BUF_SIZE];
765 apr_size_t space_avail;
766 int done;
767 apr_status_t rv = APR_SUCCESS;
768
769 if (!ctx) {
770 /* this is SetOutputFilter path; grab the preallocated context,
771 * if any; note that if we decided not to do anything in an earlier
772 * handler, we won't even have a reqinfo
773 */
774 if (reqinfo) {
775 ctx = f->ctx = reqinfo->output_ctx;
776 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
777 * in the filter chain; we can't have two
778 * instances using the same context
779 */
780 }
781 if (!ctx) { /* no idea how to translate; don't do anything */
782 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
783 ctx->dc = dc;
784 ctx->noop = 1;
785 }
786 }
787
788 /* Check the mime type to see if translation should be performed.
789 */
790 if (!ctx->noop && ctx->xlate == NULL) {
791 const char *mime_type = f->r->content_type;
792
793 if (mime_type && (ap_cstr_casecmpn(mime_type, "text/", 5) == 0 ||
794 #if APR_CHARSET_EBCDIC
795 /* On an EBCDIC machine, be willing to translate mod_autoindex-
796 * generated output. Otherwise, it doesn't look too cool.
797 *
798 * XXX This isn't a perfect fix because this doesn't trigger us
799 * to convert from the charset of the source code to ASCII. The
800 * general solution seems to be to allow a generator to set an
801 * indicator in the r specifying that the body is coded in the
802 * implementation character set (i.e., the charset of the source
803 * code). This would get several different types of documents
804 * translated properly: mod_autoindex output, mod_status output,
805 * mod_info output, hard-coded error documents, etc.
806 */
807 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
808 #endif
809 ap_cstr_casecmpn(mime_type, "message/", 8) == 0 ||
810 dc->force_xlate == FX_FORCE)) {
811
812 rv = apr_xlate_open(&ctx->xlate,
813 dc->charset_default, dc->charset_source, f->r->pool);
814 if (rv != APR_SUCCESS) {
815 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01453)
816 "can't open translation %s->%s",
817 dc->charset_source, dc->charset_default);
818 ctx->noop = 1;
819 }
820 else {
821 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
822 ctx->is_sb = 0;
823 }
824 }
825 }
826 else {
827 ctx->noop = 1;
828 if (mime_type) {
829 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
830 "mime type is %s; no translation selected",
831 mime_type);
832 }
833 }
834 }
835
836 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
837 "xlate_out_filter() - "
838 "charset_source: %s charset_default: %s",
839 dc && dc->charset_source ? dc->charset_source : "(none)",
840 dc && dc->charset_default ? dc->charset_default : "(none)");
841
842 if (!ctx->ran) { /* filter never ran before */
843 chk_filter_chain(f);
844 ctx->ran = 1;
845 if (!ctx->noop && !ctx->is_sb) {
846 /* We're not converting between two single-byte charsets, so unset
847 * Content-Length since it is unlikely to remain the same.
848 */
849 apr_table_unset(f->r->headers_out, "Content-Length");
850 }
851 }
852
853 if (ctx->noop) {
854 return ap_pass_brigade(f->next, bb);
855 }
856
857 dptr = APR_BRIGADE_FIRST(bb);
858 done = 0;
859 cur_len = 0;
860 space_avail = sizeof(tmp);
861 consumed_bucket = NULL;
862 while (!done) {
863 if (!cur_len) { /* no bytes left to process in the current bucket... */
864 if (consumed_bucket) {
865 apr_bucket_delete(consumed_bucket);
866 consumed_bucket = NULL;
867 }
868 if (dptr == APR_BRIGADE_SENTINEL(bb)) {
869 break;
870 }
871 if (APR_BUCKET_IS_EOS(dptr)) {
872 cur_len = -1; /* XXX yuck, but that tells us to send
873 * eos down; when we minimize our bb construction
874 * we'll fix this crap */
875 if (ctx->saved) {
876 /* Oops... we have a partial char from the previous bucket
877 * that won't be completed because there's no more data.
878 */
879 rv = APR_INCOMPLETE;
880 ctx->ees = EES_INCOMPLETE_CHAR;
881 }
882 break;
883 }
884 if (APR_BUCKET_IS_METADATA(dptr)) {
885 apr_bucket *metadata_bucket;
886 metadata_bucket = dptr;
887 dptr = APR_BUCKET_NEXT(dptr);
888 APR_BUCKET_REMOVE(metadata_bucket);
889 rv = send_bucket_downstream(f, metadata_bucket);
890 if (rv != APR_SUCCESS) {
891 done = 1;
892 }
893 continue;
894 }
895 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
896 if (rv != APR_SUCCESS) {
897 ctx->ees = EES_BUCKET_READ;
898 break;
899 }
900 consumed_bucket = dptr; /* for axing when we're done reading it */
901 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
902 * next bucket */
903 }
904 /* Try to fill up our tmp buffer with translated data. */
905 cur_avail = cur_len;
906
907 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
908 if (ctx->saved) {
909 /* Rats... we need to finish a partial character from the previous
910 * bucket.
911 */
912 char *tmp_tmp;
913
914 tmp_tmp = tmp + sizeof(tmp) - space_avail;
915 rv = finish_partial_char(ctx,
916 &cur_str, &cur_len,
917 &tmp_tmp, &space_avail);
918 }
919 else {
920 rv = apr_xlate_conv_buffer(ctx->xlate,
921 cur_str, &cur_avail,
922 tmp + sizeof(tmp) - space_avail, &space_avail);
923
924 /* Update input ptr and len after consuming some bytes */
925 cur_str += cur_len - cur_avail;
926 cur_len = cur_avail;
927
928 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
929 /* We need to save the final byte(s) for next time; we can't
930 * convert it until we look at the next bucket.
931 */
932 rv = set_aside_partial_char(ctx, cur_str, cur_len);
933 cur_len = 0;
934 }
935 }
936 }
937
938 if (rv != APR_SUCCESS) {
939 /* bad input byte or partial char too big to store */
940 done = 1;
941 }
942
943 if (space_avail < XLATE_MIN_BUFF_LEFT) {
944 /* It is time to flush, as there is not enough space left in the
945 * current output buffer to bother with converting more data.
946 */
947 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
948 if (rv != APR_SUCCESS) {
949 done = 1;
950 }
951
952 /* tmp is now empty */
953 space_avail = sizeof(tmp);
954 }
955 }
956
957 if (rv == APR_SUCCESS) {
958 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
959 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
960 }
961 }
962 if (rv == APR_SUCCESS) {
963 if (cur_len == -1) {
964 rv = send_eos(f);
965 }
966 }
967 else {
968 log_xlate_error(f, rv);
969 }
970
971 return rv;
972 }
973
xlate_in_filter(ap_filter_t * f,apr_bucket_brigade * bb,ap_input_mode_t mode,apr_read_type_e block,apr_off_t readbytes)974 static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
975 ap_input_mode_t mode, apr_read_type_e block,
976 apr_off_t readbytes)
977 {
978 apr_status_t rv;
979 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
980 &charset_lite_module);
981 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
982 &charset_lite_module);
983 charset_filter_ctx_t *ctx = f->ctx;
984 apr_size_t buffer_size;
985 int hit_eos;
986
987 /* just get out of the way of things we don't want. */
988 if (mode != AP_MODE_READBYTES) {
989 return ap_get_brigade(f->next, bb, mode, block, readbytes);
990 }
991
992 if (!ctx) {
993 /* this is SetInputFilter path; grab the preallocated context,
994 * if any; note that if we decided not to do anything in an earlier
995 * handler, we won't even have a reqinfo
996 */
997 if (reqinfo) {
998 ctx = f->ctx = reqinfo->input_ctx;
999 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
1000 * in the filter chain; we can't have two
1001 * instances using the same context
1002 */
1003 }
1004 if (!ctx) { /* no idea how to translate; don't do anything */
1005 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
1006 ctx->dc = dc;
1007 ctx->noop = 1;
1008 }
1009 }
1010
1011 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
1012 "xlate_in_filter() - "
1013 "charset_source: %s charset_default: %s",
1014 dc && dc->charset_source ? dc->charset_source : "(none)",
1015 dc && dc->charset_default ? dc->charset_default : "(none)");
1016
1017 if (!ctx->ran) { /* filter never ran before */
1018 chk_filter_chain(f);
1019 ctx->ran = 1;
1020 if (!ctx->noop && !ctx->is_sb
1021 && apr_table_get(f->r->headers_in, "Content-Length")) {
1022 /* A Content-Length header is present, but it won't be valid after
1023 * conversion because we're not converting between two single-byte
1024 * charsets. This will affect most CGI scripts and may affect
1025 * some modules.
1026 * Content-Length can't be unset here because that would break
1027 * being able to read the request body.
1028 * Processing of chunked request bodies is not impacted by this
1029 * filter since the length was not declared anyway.
1030 */
1031 ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r,
1032 "Request body length may change, resulting in "
1033 "misprocessing by some modules or scripts");
1034 }
1035 }
1036
1037 if (ctx->noop) {
1038 return ap_get_brigade(f->next, bb, mode, block, readbytes);
1039 }
1040
1041 if (APR_BRIGADE_EMPTY(ctx->bb)) {
1042 if ((rv = ap_get_brigade(f->next, bb, mode, block,
1043 readbytes)) != APR_SUCCESS) {
1044 return rv;
1045 }
1046 }
1047 else {
1048 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
1049 }
1050
1051 buffer_size = INPUT_XLATE_BUF_SIZE;
1052 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
1053 if (rv == APR_SUCCESS) {
1054 if (!hit_eos) {
1055 /* move anything leftover into our context for next time;
1056 * we don't currently "set aside" since the data came from
1057 * down below, but I suspect that for long-term we need to
1058 * do that
1059 */
1060 APR_BRIGADE_CONCAT(ctx->bb, bb);
1061 }
1062 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
1063 apr_bucket *e;
1064
1065 e = apr_bucket_heap_create(ctx->tmp,
1066 INPUT_XLATE_BUF_SIZE - buffer_size,
1067 NULL, f->r->connection->bucket_alloc);
1068 /* make sure we insert at the head, because there may be
1069 * an eos bucket already there, and the eos bucket should
1070 * come after the data
1071 */
1072 APR_BRIGADE_INSERT_HEAD(bb, e);
1073 }
1074 else {
1075 /* XXX need to get some more data... what if the last brigade
1076 * we got had only the first byte of a multibyte char? we need
1077 * to grab more data from the network instead of returning an
1078 * empty brigade
1079 */
1080 }
1081 /* If we have any metadata at the head of ctx->bb, go ahead and move it
1082 * onto the end of bb to be returned to our caller.
1083 */
1084 if (!APR_BRIGADE_EMPTY(ctx->bb)) {
1085 apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb);
1086 while (b != APR_BRIGADE_SENTINEL(ctx->bb)
1087 && APR_BUCKET_IS_METADATA(b)) {
1088 APR_BUCKET_REMOVE(b);
1089 APR_BRIGADE_INSERT_TAIL(bb, b);
1090 b = APR_BRIGADE_FIRST(ctx->bb);
1091 }
1092 }
1093 }
1094 else {
1095 log_xlate_error(f, rv);
1096 }
1097
1098 return rv;
1099 }
1100
1101 static const command_rec cmds[] =
1102 {
1103 AP_INIT_TAKE1("CharsetSourceEnc",
1104 add_charset_source,
1105 NULL,
1106 OR_FILEINFO,
1107 "source (html,cgi,ssi) file charset"),
1108 AP_INIT_TAKE1("CharsetDefault",
1109 add_charset_default,
1110 NULL,
1111 OR_FILEINFO,
1112 "name of default charset"),
1113 AP_INIT_ITERATE("CharsetOptions",
1114 add_charset_options,
1115 NULL,
1116 OR_FILEINFO,
1117 "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, "
1118 "NoTranslateAllMimeTypes"),
1119 {NULL}
1120 };
1121
charset_register_hooks(apr_pool_t * p)1122 static void charset_register_hooks(apr_pool_t *p)
1123 {
1124 ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
1125 ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
1126 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
1127 AP_FTYPE_RESOURCE);
1128 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
1129 AP_FTYPE_RESOURCE);
1130 }
1131
1132 AP_DECLARE_MODULE(charset_lite) =
1133 {
1134 STANDARD20_MODULE_STUFF,
1135 create_charset_dir_conf,
1136 merge_charset_dir_conf,
1137 NULL,
1138 NULL,
1139 cmds,
1140 charset_register_hooks
1141 };
1142
1143