1 /* Copyright (c) 2007-11, WebThing Ltd
2 * Copyright (c) 2011-, The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 #if defined(WIN32)
21 #define XML2ENC_DECLARE_EXPORT
22 #endif
23
24 #include <ctype.h>
25
26 /* libxml2 includes unicode/[...].h files which uses C++ comments */
27 #if defined(__clang__)
28 #pragma clang diagnostic push
29 #pragma clang diagnostic warning "-Wcomment"
30 #elif defined(__GNUC__)
31 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
32 #pragma GCC diagnostic push
33 #pragma GCC diagnostic warning "-Wcomment"
34 #endif
35 #endif
36
37 /* libxml2 */
38 #include <libxml/encoding.h>
39
40 #if defined(__clang__)
41 #pragma clang diagnostic pop
42 #elif defined(__GNUC__)
43 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
44 #pragma GCC diagnostic pop
45 #endif
46 #endif
47
48 #include "http_protocol.h"
49 #include "http_config.h"
50 #include "http_log.h"
51 #include "apr_strings.h"
52 #include "apr_xlate.h"
53
54 #include "apr_optional.h"
55 #include "mod_xml2enc.h"
56
57 module AP_MODULE_DECLARE_DATA xml2enc_module;
58
59 #define BUFLEN 8192
60 #define BUF_MIN 4096
61 #define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \
62 b != APR_BRIGADE_SENTINEL(bb); \
63 b = APR_BUCKET_NEXT(b))
64
65 #define ENC_INITIALISED 0x100
66 #define ENC_SEEN_EOS 0x200
67 #define ENC_SKIPTO ENCIO_SKIPTO
68
69 #define HAVE_ENCODING(enc) \
70 (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR))
71
72 /*
73 * XXX: Check all those ap_assert()s and replace those that should not happen
74 * XXX: with AP_DEBUG_ASSERT and those that may happen with proper error
75 * XXX: handling.
76 */
77 typedef struct {
78 xmlCharEncoding xml2enc;
79 char* buf;
80 apr_size_t bytes;
81 apr_xlate_t* convset;
82 unsigned int flags;
83 apr_off_t bblen;
84 apr_bucket_brigade* bbnext;
85 apr_bucket_brigade* bbsave;
86 const char* encoding;
87 } xml2ctx;
88
89 typedef struct {
90 const char* default_charset;
91 xmlCharEncoding default_encoding;
92 apr_array_header_t* skipto;
93 } xml2cfg;
94
95 typedef struct {
96 const char* val;
97 } tattr;
98
99 static ap_regex_t* seek_meta_ctype;
100 static ap_regex_t* seek_charset;
101
xml2enc_filter(request_rec * r,const char * enc,unsigned int mode)102 static apr_status_t xml2enc_filter(request_rec* r, const char* enc,
103 unsigned int mode)
104 {
105 /* set up a ready-initialised ctx to convert to enc, and insert filter */
106 apr_xlate_t* convset;
107 apr_status_t rv;
108 unsigned int flags = (mode ^ ENCIO);
109 if ((mode & ENCIO) == ENCIO_OUTPUT) {
110 rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool);
111 flags |= ENC_INITIALISED;
112 }
113 else if ((mode & ENCIO) == ENCIO_INPUT) {
114 rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool);
115 flags |= ENC_INITIALISED;
116 }
117 else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) {
118 convset = NULL;
119 rv = APR_SUCCESS; /* we'll initialise later by sniffing */
120 }
121 else {
122 rv = APR_EGENERAL;
123 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01426)
124 "xml2enc: bad mode %x", mode);
125 }
126 if (rv == APR_SUCCESS) {
127 xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx));
128 ctx->flags = flags;
129 if (flags & ENC_INITIALISED) {
130 ctx->convset = convset;
131 ctx->bblen = BUFLEN;
132 ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen);
133 }
134 ap_add_output_filter("xml2enc", ctx, r, r->connection);
135 }
136 else {
137 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01427)
138 "xml2enc: Charset %s not supported.", enc) ;
139 }
140 return rv;
141 }
142
143 /* This needs to operate only when we're using htmlParser */
144 /* Different modules may apply different rules here. Ho, hum. */
fix_skipto(request_rec * r,xml2ctx * ctx)145 static void fix_skipto(request_rec* r, xml2ctx* ctx)
146 {
147 apr_status_t rv;
148 xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
149 if ((cfg->skipto != NULL) && (ctx->flags & ENC_SKIPTO)) {
150 int found = 0;
151 char* p = ap_strchr(ctx->buf, '<');
152 tattr* starts = (tattr*) cfg->skipto->elts;
153 while (!found && p && *p) {
154 int i;
155 for (i = 0; i < cfg->skipto->nelts; ++i) {
156 if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
157 /* found a starting element. Strip all that comes before. */
158 apr_bucket* b;
159 apr_bucket* bstart;
160 rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf),
161 &bstart);
162 ap_assert(rv == APR_SUCCESS);
163 while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) {
164 apr_bucket_delete(b);
165 }
166 ctx->bytes -= (p-ctx->buf);
167 ctx->buf = p ;
168 found = 1;
169 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01428)
170 "Skipped to first <%s> element",
171 starts[i].val) ;
172 break;
173 }
174 }
175 p = ap_strchr(p+1, '<');
176 }
177 if (p == NULL) {
178 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01429)
179 "Failed to find start of recognised HTML!");
180 }
181 }
182 }
sniff_encoding(request_rec * r,xml2ctx * ctx)183 static void sniff_encoding(request_rec* r, xml2ctx* ctx)
184 {
185 xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */
186 char* p ;
187 apr_bucket* cutb;
188 apr_bucket* cute;
189 apr_bucket* b;
190 ap_regmatch_t match[2] ;
191 apr_status_t rv;
192 const char* ctype = r->content_type;
193
194 if (ctype) {
195 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01430)
196 "Content-Type is %s", ctype) ;
197
198 /* If we've got it in the HTTP headers, there's nothing to do */
199 if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) {
200 p += 8 ;
201 if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ),
202 ctx->encoding) {
203 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01431)
204 "Got charset %s from HTTP headers", ctx->encoding) ;
205 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
206 }
207 }
208 }
209
210 /* to sniff, first we look for BOM */
211 if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) {
212 ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf,
213 ctx->bytes);
214 if (HAVE_ENCODING(ctx->xml2enc)) {
215 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01432)
216 "Got charset from XML rules.") ;
217 ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc);
218 }
219 }
220
221 /* If none of the above, look for a META-thingey */
222 /* also we're probably about to invalidate it, so we remove it. */
223 if (ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) {
224 /* get markers on the start and end of the match */
225 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute);
226 ap_assert(rv == APR_SUCCESS);
227 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb);
228 ap_assert(rv == APR_SUCCESS);
229 /* now set length of useful buf for start-of-data hooks */
230 ctx->bytes = match[0].rm_so;
231 if (ctx->encoding == NULL) {
232 p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so,
233 match[0].rm_eo - match[0].rm_so) ;
234 if (ap_regexec(seek_charset, p, 2, match, 0) == 0) {
235 if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
236 match[1].rm_eo - match[1].rm_so),
237 ctx->encoding) {
238 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
239 if (HAVE_ENCODING(ctx->xml2enc))
240 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01433)
241 "Got charset %s from HTML META", ctx->encoding) ;
242 }
243 }
244 }
245
246 /* cut out the <meta> we're invalidating */
247 while (cutb != cute) {
248 b = APR_BUCKET_NEXT(cutb);
249 apr_bucket_delete(cutb);
250 cutb = b;
251 }
252 /* and leave a string */
253 ctx->buf[ctx->bytes] = 0;
254 }
255
256 /* either it's set to something we found or it's still the default */
257 /* Aaargh! libxml2 has undocumented <META-crap> support. So this fails
258 * if metafix is not active. Have to make it conditional.
259 *
260 * No, that means no-metafix breaks things. Deal immediately with
261 * this particular instance of metafix.
262 */
263 if (!HAVE_ENCODING(ctx->xml2enc)) {
264 cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
265 if (!ctx->encoding) {
266 ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1";
267 }
268 /* Unsupported charset. Can we get (iconv) support through apr_xlate? */
269 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01434)
270 "Charset %s not supported by libxml2; trying apr_xlate",
271 ctx->encoding);
272 if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool)
273 == APR_SUCCESS) {
274 ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ;
275 } else {
276 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01435)
277 "Charset %s not supported. Consider aliasing it?",
278 ctx->encoding) ;
279 }
280 }
281
282 if (!HAVE_ENCODING(ctx->xml2enc)) {
283 /* Use configuration default as a last resort */
284 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01436)
285 "No usable charset information; using configuration default");
286 ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
287 ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
288 }
289 if (ctype && ctx->encoding) {
290 if (ap_regexec(seek_charset, ctype, 2, match, 0)) {
291 r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8",
292 NULL);
293 } else {
294 char* str = apr_palloc(r->pool, strlen(r->content_type) + 13
295 - (match[0].rm_eo - match[0].rm_so) + 1);
296 memcpy(str, r->content_type, match[1].rm_so);
297 memcpy(str + match[1].rm_so, "utf-8", 5);
298 strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo);
299 r->content_type = str;
300 }
301 }
302 }
303
xml2enc_filter_init(ap_filter_t * f)304 static apr_status_t xml2enc_filter_init(ap_filter_t* f)
305 {
306 xml2ctx* ctx;
307 if (!f->ctx) {
308 xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config,
309 &xml2enc_module);
310 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx));
311 ctx->xml2enc = XML_CHAR_ENCODING_NONE;
312 if (cfg->skipto != NULL) {
313 ctx->flags |= ENC_SKIPTO;
314 }
315 }
316 return APR_SUCCESS;
317 }
xml2enc_ffunc(ap_filter_t * f,apr_bucket_brigade * bb)318 static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb)
319 {
320 xml2ctx* ctx = f->ctx;
321 apr_status_t rv;
322 apr_bucket* b;
323 apr_bucket* bstart;
324 apr_size_t insz = 0;
325 int pending_meta = 0;
326 char *ctype;
327 char *p;
328
329 if (!ctx || !f->r->content_type) {
330 /* log error about configuring this */
331 ap_remove_output_filter(f);
332 return ap_pass_brigade(f->next, bb) ;
333 }
334
335 ctype = apr_pstrdup(f->r->pool, f->r->content_type);
336 for (p = ctype; *p; ++p)
337 if (isupper(*p))
338 *p = tolower(*p);
339
340 /* only act if starts-with "text/" or contains "xml" */
341 if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) {
342 ap_remove_output_filter(f);
343 return ap_pass_brigade(f->next, bb) ;
344 }
345
346 if (ctx->bbsave == NULL) {
347 ctx->bbsave = apr_brigade_create(f->r->pool,
348 f->r->connection->bucket_alloc);
349 }
350 /* append to any data left over from last time */
351 APR_BRIGADE_CONCAT(ctx->bbsave, bb);
352
353 if (!(ctx->flags & ENC_INITIALISED)) {
354 /* some kind of initialisation required */
355 /* Turn all this off when post-processing */
356
357 /* if we don't have enough data to sniff but more's to come, wait */
358 apr_brigade_length(ctx->bbsave, 0, &ctx->bblen);
359 if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) {
360 APR_BRIGADE_DO(b, ctx->bbsave) {
361 if (APR_BUCKET_IS_EOS(b)) {
362 ctx->flags |= ENC_SEEN_EOS;
363 break;
364 }
365 }
366 if (!(ctx->flags & ENC_SEEN_EOS)) {
367 /* not enough data to sniff. Wait for more */
368 APR_BRIGADE_DO(b, ctx->bbsave) {
369 rv = apr_bucket_setaside(b, f->r->pool);
370 ap_assert(rv == APR_SUCCESS);
371 }
372 return APR_SUCCESS;
373 }
374 }
375 if (ctx->bblen == -1) {
376 ctx->bblen = BUFLEN-1;
377 }
378
379 /* flatten it into a NULL-terminated string */
380 ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1));
381 ctx->bytes = (apr_size_t)ctx->bblen;
382 rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes);
383 ap_assert(rv == APR_SUCCESS);
384 ctx->buf[ctx->bytes] = 0;
385 sniff_encoding(f->r, ctx);
386
387 /* FIXME: hook here for rewriting start-of-data? */
388 /* nah, we only have one action here - call it inline */
389 fix_skipto(f->r, ctx);
390
391 /* we might change the Content-Length, so let's force its re-calculation */
392 apr_table_unset(f->r->headers_out, "Content-Length");
393
394 /* consume the data we just sniffed */
395 /* we need to omit any <meta> we just invalidated */
396 ctx->flags |= ENC_INITIALISED;
397 ap_set_module_config(f->r->request_config, &xml2enc_module, ctx);
398 }
399 if (ctx->bbnext == NULL) {
400 ctx->bbnext = apr_brigade_create(f->r->pool,
401 f->r->connection->bucket_alloc);
402 }
403
404 if (!ctx->convset) {
405 rv = ap_pass_brigade(f->next, ctx->bbsave);
406 apr_brigade_cleanup(ctx->bbsave);
407 ap_remove_output_filter(f);
408 return rv;
409 }
410 /* move the data back to bb */
411 APR_BRIGADE_CONCAT(bb, ctx->bbsave);
412
413 while (!APR_BRIGADE_EMPTY(bb)) {
414 b = APR_BRIGADE_FIRST(bb);
415 ctx->bytes = 0;
416 if (APR_BUCKET_IS_METADATA(b)) {
417 APR_BUCKET_REMOVE(b);
418 APR_BRIGADE_INSERT_TAIL(ctx->bbnext, b);
419 /* Besides FLUSH, aggregate meta buckets to send them at
420 * once below. This resource filter is over on EOS.
421 */
422 pending_meta = 1;
423 if (APR_BUCKET_IS_EOS(b)) {
424 ap_remove_output_filter(f);
425 APR_BRIGADE_CONCAT(ctx->bbnext, bb);
426 }
427 else if (!APR_BUCKET_IS_FLUSH(b)) {
428 continue;
429 }
430 }
431 if (pending_meta) {
432 pending_meta = 0;
433 /* passing meta bucket down the chain */
434 rv = ap_pass_brigade(f->next, ctx->bbnext);
435 apr_brigade_cleanup(ctx->bbnext);
436 if (rv != APR_SUCCESS) {
437 return rv;
438 }
439 continue;
440 }
441 /* data bucket */
442 {
443 char* buf;
444 apr_size_t bytes = 0;
445 char fixbuf[BUFLEN];
446 apr_bucket* bdestroy = NULL;
447 if (insz > 0) { /* we have dangling data. Flatten it. */
448 buf = fixbuf;
449 bytes = BUFLEN;
450 rv = apr_brigade_flatten(bb, buf, &bytes);
451 ap_assert(rv == APR_SUCCESS);
452 if (bytes == insz) {
453 /* this is only what we've already tried to convert.
454 * The brigade is exhausted.
455 * Save remaining data for next time round
456 */
457
458 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01437)
459 "xml2enc: Setting aside %" APR_SIZE_T_FMT
460 " unconverted bytes", bytes);
461 rv = ap_fflush(f->next, ctx->bbnext);
462 APR_BRIGADE_CONCAT(ctx->bbsave, bb);
463 APR_BRIGADE_DO(b, ctx->bbsave) {
464 ap_assert(apr_bucket_setaside(b, f->r->pool)
465 == APR_SUCCESS);
466 }
467 return rv;
468 }
469 /* remove the data we've just read */
470 rv = apr_brigade_partition(bb, bytes, &bstart);
471 while (b = APR_BRIGADE_FIRST(bb), b != bstart) {
472 apr_bucket_delete(b);
473 }
474 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01438)
475 "xml2enc: consuming %" APR_SIZE_T_FMT
476 " bytes flattened", bytes);
477 }
478 else {
479 rv = apr_bucket_read(b, (const char**)&buf, &bytes,
480 APR_BLOCK_READ);
481 APR_BUCKET_REMOVE(b);
482 bdestroy = b; /* can't destroy until finished with the data */
483 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01439)
484 "xml2enc: consuming %" APR_SIZE_T_FMT
485 " bytes from bucket", bytes);
486 }
487 /* OK, we've got some input we can use in [buf,bytes] */
488 if (rv == APR_SUCCESS) {
489 apr_size_t consumed;
490 xml2enc_run_preprocess(f, &buf, &bytes);
491 consumed = insz = bytes;
492 while (insz > 0) {
493 apr_status_t rv2;
494 if (ctx->bytes == ctx->bblen) {
495 /* nothing was converted last time!
496 * break out of this loop!
497 */
498 b = apr_bucket_transient_create(buf+(bytes - insz), insz,
499 bb->bucket_alloc);
500 APR_BRIGADE_INSERT_HEAD(bb, b);
501 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01440)
502 "xml2enc: reinserting %" APR_SIZE_T_FMT
503 " unconsumed bytes from bucket", insz);
504 break;
505 }
506 ctx->bytes = (apr_size_t)ctx->bblen;
507 rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz),
508 &insz, ctx->buf, &ctx->bytes);
509 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01441)
510 "xml2enc: converted %" APR_SIZE_T_FMT
511 "/%" APR_OFF_T_FMT " bytes", consumed - insz,
512 ctx->bblen - ctx->bytes);
513 consumed = insz;
514 rv2 = ap_fwrite(f->next, ctx->bbnext, ctx->buf,
515 (apr_size_t)ctx->bblen - ctx->bytes);
516 if (rv2 != APR_SUCCESS) {
517 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv2, f->r, APLOGNO(01442)
518 "ap_fwrite failed");
519 return rv2;
520 }
521 switch (rv) {
522 case APR_SUCCESS:
523 continue;
524 case APR_EINCOMPLETE:
525 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01443)
526 "INCOMPLETE");
527 continue; /* If outbuf too small, go round again.
528 * If it was inbuf, we'll break out when
529 * we test ctx->bytes == ctx->bblen
530 */
531 case APR_EINVAL: /* try skipping one bad byte */
532 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01444)
533 "Skipping invalid byte(s) in input stream!");
534 --insz;
535 continue;
536 default:
537 /* Erk! What's this?
538 * Bail out, flush, and hope to eat the buf raw
539 */
540 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01445)
541 "Failed to convert input; trying it raw") ;
542 ctx->convset = NULL;
543 rv = ap_fflush(f->next, ctx->bbnext);
544 if (rv != APR_SUCCESS)
545 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01446)
546 "ap_fflush failed");
547 apr_brigade_cleanup(ctx->bbnext);
548 }
549 }
550 } else {
551 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01447)
552 "xml2enc: error reading data") ;
553 }
554 if (bdestroy)
555 apr_bucket_destroy(bdestroy);
556 if (rv != APR_SUCCESS)
557 return rv;
558 }
559 }
560 if (pending_meta) {
561 /* passing pending meta bucket down the chain before leaving */
562 rv = ap_pass_brigade(f->next, ctx->bbnext);
563 apr_brigade_cleanup(ctx->bbnext);
564 if (rv != APR_SUCCESS) {
565 return rv;
566 }
567 }
568
569 return APR_SUCCESS;
570 }
571
xml2enc_charset(request_rec * r,xmlCharEncoding * encp,const char ** encoding)572 static apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp,
573 const char** encoding)
574 {
575 xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module);
576 if (!ctx || !(ctx->flags & ENC_INITIALISED)) {
577 return APR_EAGAIN;
578 }
579 *encp = ctx->xml2enc;
580 *encoding = ctx->encoding;
581 return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL;
582 }
583
584 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
xml2enc_hooks(apr_pool_t * pool)585 static void xml2enc_hooks(apr_pool_t* pool)
586 {
587 ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc,
588 xml2enc_filter_init,
589 AP_FTYPE_RESOURCE, PROTO_FLAGS);
590 APR_REGISTER_OPTIONAL_FN(xml2enc_filter);
591 APR_REGISTER_OPTIONAL_FN(xml2enc_charset);
592 seek_meta_ctype = ap_pregcomp(pool,
593 "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
594 AP_REG_EXTENDED|AP_REG_ICASE) ;
595 seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
596 AP_REG_EXTENDED|AP_REG_ICASE) ;
597 }
set_alias(cmd_parms * cmd,void * CFG,const char * charset,const char * alias)598 static const char* set_alias(cmd_parms* cmd, void* CFG,
599 const char* charset, const char* alias)
600 {
601 const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
602 if (errmsg != NULL)
603 return errmsg ;
604 else if (xmlAddEncodingAlias(charset, alias) == 0)
605 return NULL;
606 else
607 return "Error setting charset alias";
608 }
609
set_default(cmd_parms * cmd,void * CFG,const char * charset)610 static const char* set_default(cmd_parms* cmd, void* CFG, const char* charset)
611 {
612 xml2cfg* cfg = CFG;
613 cfg->default_charset = charset;
614 cfg->default_encoding = xmlParseCharEncoding(charset);
615 switch(cfg->default_encoding) {
616 case XML_CHAR_ENCODING_NONE:
617 return "Default charset not found";
618 case XML_CHAR_ENCODING_ERROR:
619 return "Invalid or unsupported default charset";
620 default:
621 return NULL;
622 }
623 }
set_skipto(cmd_parms * cmd,void * CFG,const char * arg)624 static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg)
625 {
626 tattr* attr;
627 xml2cfg* cfg = CFG;
628 if (cfg->skipto == NULL)
629 cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
630 attr = apr_array_push(cfg->skipto) ;
631 attr->val = arg;
632 return NULL;
633 }
634
635 static const command_rec xml2enc_cmds[] = {
636 AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
637 "Usage: xml2EncDefault charset"),
638 AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF,
639 "EncodingAlias charset alias [more aliases]"),
640 AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
641 "Ignore anything in front of the first of these elements"),
642 { NULL }
643 };
xml2enc_config(apr_pool_t * pool,char * x)644 static void* xml2enc_config(apr_pool_t* pool, char* x)
645 {
646 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
647 ret->default_encoding = XML_CHAR_ENCODING_NONE ;
648 return ret;
649 }
650
xml2enc_merge(apr_pool_t * pool,void * BASE,void * ADD)651 static void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD)
652 {
653 xml2cfg* base = BASE;
654 xml2cfg* add = ADD;
655 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
656 ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
657 ? base->default_encoding : add->default_encoding ;
658 ret->default_charset = add->default_charset
659 ? add->default_charset : base->default_charset;
660 ret->skipto = add->skipto ? add->skipto : base->skipto;
661 return ret;
662 }
663
664 AP_DECLARE_MODULE(xml2enc) = {
665 STANDARD20_MODULE_STUFF,
666 xml2enc_config,
667 xml2enc_merge,
668 NULL,
669 NULL,
670 xml2enc_cmds,
671 xml2enc_hooks
672 };
673
674 APR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess,
675 (ap_filter_t *f, char** bufp, apr_size_t* bytesp),
676 (f, bufp, bytesp), OK, DECLINED)
677