1 /*      Copyright (c) 2007-11, WebThing Ltd
2  *      Copyright (c) 2011-, The Apache Software Foundation
3  *
4  * Licensed to the Apache Software Foundation (ASF) under one or more
5  * contributor license agreements.  See the NOTICE file distributed with
6  * this work for additional information regarding copyright ownership.
7  * The ASF licenses this file to You under the Apache License, Version 2.0
8  * (the "License"); you may not use this file except in compliance with
9  * the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 
20 #if defined(WIN32)
21 #define XML2ENC_DECLARE_EXPORT
22 #endif
23 
24 #include <ctype.h>
25 
26 /* libxml2 includes unicode/[...].h files which uses C++ comments */
27 #if defined(__clang__)
28 #pragma clang diagnostic push
29 #pragma clang diagnostic warning "-Wcomment"
30 #elif defined(__GNUC__)
31 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
32 #pragma GCC diagnostic push
33 #pragma GCC diagnostic warning "-Wcomment"
34 #endif
35 #endif
36 
37 /* libxml2 */
38 #include <libxml/encoding.h>
39 
40 #if defined(__clang__)
41 #pragma clang diagnostic pop
42 #elif defined(__GNUC__)
43 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
44 #pragma GCC diagnostic pop
45 #endif
46 #endif
47 
48 #include "http_protocol.h"
49 #include "http_config.h"
50 #include "http_log.h"
51 #include "apr_strings.h"
52 #include "apr_xlate.h"
53 
54 #include "apr_optional.h"
55 #include "mod_xml2enc.h"
56 
57 module AP_MODULE_DECLARE_DATA xml2enc_module;
58 
59 #define BUFLEN 8192
60 #define BUF_MIN 4096
61 #define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \
62                                   b != APR_BRIGADE_SENTINEL(bb); \
63                                   b = APR_BUCKET_NEXT(b))
64 
65 #define ENC_INITIALISED 0x100
66 #define ENC_SEEN_EOS 0x200
67 #define ENC_SKIPTO ENCIO_SKIPTO
68 
69 #define HAVE_ENCODING(enc) \
70         (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR))
71 
72 /*
73  * XXX: Check all those ap_assert()s and replace those that should not happen
74  * XXX: with AP_DEBUG_ASSERT and those that may happen with proper error
75  * XXX: handling.
76  */
77 typedef struct {
78     xmlCharEncoding xml2enc;
79     char* buf;
80     apr_size_t bytes;
81     apr_xlate_t* convset;
82     unsigned int flags;
83     apr_off_t bblen;
84     apr_bucket_brigade* bbnext;
85     apr_bucket_brigade* bbsave;
86     const char* encoding;
87 } xml2ctx;
88 
89 typedef struct {
90     const char* default_charset;
91     xmlCharEncoding default_encoding;
92     apr_array_header_t* skipto;
93 } xml2cfg;
94 
95 typedef struct {
96     const char* val;
97 } tattr;
98 
99 static ap_regex_t* seek_meta_ctype;
100 static ap_regex_t* seek_charset;
101 
xml2enc_filter(request_rec * r,const char * enc,unsigned int mode)102 static apr_status_t xml2enc_filter(request_rec* r, const char* enc,
103                                    unsigned int mode)
104 {
105     /* set up a ready-initialised ctx to convert to enc, and insert filter */
106     apr_xlate_t* convset;
107     apr_status_t rv;
108     unsigned int flags = (mode ^ ENCIO);
109     if ((mode & ENCIO) == ENCIO_OUTPUT) {
110         rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool);
111         flags |= ENC_INITIALISED;
112     }
113     else if ((mode & ENCIO) == ENCIO_INPUT) {
114         rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool);
115         flags |= ENC_INITIALISED;
116     }
117     else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) {
118         convset = NULL;
119         rv = APR_SUCCESS; /* we'll initialise later by sniffing */
120     }
121     else {
122         rv = APR_EGENERAL;
123         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01426)
124                       "xml2enc: bad mode %x", mode);
125     }
126     if (rv == APR_SUCCESS) {
127         xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx));
128         ctx->flags = flags;
129         if (flags & ENC_INITIALISED) {
130             ctx->convset = convset;
131             ctx->bblen = BUFLEN;
132             ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen);
133         }
134         ap_add_output_filter("xml2enc", ctx, r, r->connection);
135     }
136     else {
137         ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01427)
138                       "xml2enc: Charset %s not supported.", enc) ;
139     }
140     return rv;
141 }
142 
143 /* This needs to operate only when we're using htmlParser */
144 /* Different modules may apply different rules here.  Ho, hum.  */
fix_skipto(request_rec * r,xml2ctx * ctx)145 static void fix_skipto(request_rec* r, xml2ctx* ctx)
146 {
147     apr_status_t rv;
148     xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
149     if ((cfg->skipto != NULL) && (ctx->flags & ENC_SKIPTO)) {
150         int found = 0;
151         char* p = ap_strchr(ctx->buf, '<');
152         tattr* starts = (tattr*) cfg->skipto->elts;
153         while (!found && p && *p) {
154             int i;
155             for (i = 0; i < cfg->skipto->nelts; ++i) {
156                 if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
157                     /* found a starting element. Strip all that comes before. */
158                     apr_bucket* b;
159                     apr_bucket* bstart;
160                     rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf),
161                                                &bstart);
162                     ap_assert(rv == APR_SUCCESS);
163                     while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) {
164                         apr_bucket_delete(b);
165                     }
166                     ctx->bytes -= (p-ctx->buf);
167                     ctx->buf = p ;
168                     found = 1;
169                     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01428)
170                                   "Skipped to first <%s> element",
171                                   starts[i].val) ;
172                     break;
173                 }
174             }
175             p = ap_strchr(p+1, '<');
176         }
177         if (p == NULL) {
178             ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01429)
179                           "Failed to find start of recognised HTML!");
180         }
181     }
182 }
sniff_encoding(request_rec * r,xml2ctx * ctx)183 static void sniff_encoding(request_rec* r, xml2ctx* ctx)
184 {
185     xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */
186     char* p ;
187     apr_bucket* cutb;
188     apr_bucket* cute;
189     apr_bucket* b;
190     ap_regmatch_t match[2] ;
191     apr_status_t rv;
192     const char* ctype = r->content_type;
193 
194     if (ctype) {
195         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01430)
196                       "Content-Type is %s", ctype) ;
197 
198         /* If we've got it in the HTTP headers, there's nothing to do */
199         if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) {
200             p += 8 ;
201             if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ),
202                 ctx->encoding) {
203                 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01431)
204                               "Got charset %s from HTTP headers", ctx->encoding) ;
205                 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
206             }
207         }
208     }
209 
210     /* to sniff, first we look for BOM */
211     if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) {
212         ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf,
213                                              ctx->bytes);
214         if (HAVE_ENCODING(ctx->xml2enc)) {
215             ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01432)
216                           "Got charset from XML rules.") ;
217             ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc);
218         }
219     }
220 
221     /* If none of the above, look for a META-thingey */
222     /* also we're probably about to invalidate it, so we remove it. */
223     if (ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) {
224         /* get markers on the start and end of the match */
225         rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute);
226         ap_assert(rv == APR_SUCCESS);
227         rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb);
228         ap_assert(rv == APR_SUCCESS);
229         /* now set length of useful buf for start-of-data hooks */
230         ctx->bytes = match[0].rm_so;
231         if (ctx->encoding == NULL) {
232             p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so,
233                              match[0].rm_eo - match[0].rm_so) ;
234             if (ap_regexec(seek_charset, p, 2, match, 0) == 0) {
235                 if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
236                                                match[1].rm_eo - match[1].rm_so),
237                     ctx->encoding) {
238                     ctx->xml2enc = xmlParseCharEncoding(ctx->encoding);
239                     if (HAVE_ENCODING(ctx->xml2enc))
240                         ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01433)
241                                       "Got charset %s from HTML META", ctx->encoding) ;
242                 }
243             }
244         }
245 
246         /* cut out the <meta> we're invalidating */
247         while (cutb != cute) {
248             b = APR_BUCKET_NEXT(cutb);
249             apr_bucket_delete(cutb);
250             cutb = b;
251         }
252         /* and leave a string */
253         ctx->buf[ctx->bytes] = 0;
254     }
255 
256     /* either it's set to something we found or it's still the default */
257     /* Aaargh!  libxml2 has undocumented <META-crap> support.  So this fails
258      * if metafix is not active.  Have to make it conditional.
259      *
260      * No, that means no-metafix breaks things.  Deal immediately with
261      * this particular instance of metafix.
262      */
263     if (!HAVE_ENCODING(ctx->xml2enc)) {
264         cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module);
265         if (!ctx->encoding) {
266             ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1";
267         }
268         /* Unsupported charset. Can we get (iconv) support through apr_xlate? */
269         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01434)
270                       "Charset %s not supported by libxml2; trying apr_xlate",
271                       ctx->encoding);
272         if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool)
273             == APR_SUCCESS) {
274             ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ;
275         } else {
276             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01435)
277                           "Charset %s not supported.  Consider aliasing it?",
278                           ctx->encoding) ;
279         }
280     }
281 
282     if (!HAVE_ENCODING(ctx->xml2enc)) {
283         /* Use configuration default as a last resort */
284         ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01436)
285                   "No usable charset information; using configuration default");
286         ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
287                         ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
288     }
289     if (ctype && ctx->encoding) {
290         if (ap_regexec(seek_charset, ctype, 2, match, 0)) {
291             r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8",
292                                           NULL);
293         } else {
294             char* str = apr_palloc(r->pool, strlen(r->content_type) + 13
295                                    - (match[0].rm_eo - match[0].rm_so) + 1);
296             memcpy(str, r->content_type, match[1].rm_so);
297             memcpy(str + match[1].rm_so, "utf-8", 5);
298             strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo);
299             r->content_type = str;
300         }
301     }
302 }
303 
xml2enc_filter_init(ap_filter_t * f)304 static apr_status_t xml2enc_filter_init(ap_filter_t* f)
305 {
306     xml2ctx* ctx;
307     if (!f->ctx) {
308         xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config,
309                                             &xml2enc_module);
310         f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx));
311         ctx->xml2enc = XML_CHAR_ENCODING_NONE;
312         if (cfg->skipto != NULL) {
313             ctx->flags |= ENC_SKIPTO;
314         }
315     }
316     return APR_SUCCESS;
317 }
xml2enc_ffunc(ap_filter_t * f,apr_bucket_brigade * bb)318 static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb)
319 {
320     xml2ctx* ctx = f->ctx;
321     apr_status_t rv;
322     apr_bucket* b;
323     apr_bucket* bstart;
324     apr_size_t insz = 0;
325     int pending_meta = 0;
326     char *ctype;
327     char *p;
328 
329     if (!ctx || !f->r->content_type) {
330         /* log error about configuring this */
331         ap_remove_output_filter(f);
332         return ap_pass_brigade(f->next, bb) ;
333     }
334 
335     ctype = apr_pstrdup(f->r->pool, f->r->content_type);
336     for (p = ctype; *p; ++p)
337         if (isupper(*p))
338             *p = tolower(*p);
339 
340     /* only act if starts-with "text/" or contains "xml" */
341     if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml"))  {
342         ap_remove_output_filter(f);
343         return ap_pass_brigade(f->next, bb) ;
344     }
345 
346     if (ctx->bbsave == NULL) {
347         ctx->bbsave = apr_brigade_create(f->r->pool,
348                                          f->r->connection->bucket_alloc);
349     }
350     /* append to any data left over from last time */
351     APR_BRIGADE_CONCAT(ctx->bbsave, bb);
352 
353     if (!(ctx->flags & ENC_INITIALISED)) {
354         /* some kind of initialisation required */
355         /* Turn all this off when post-processing */
356 
357         /* if we don't have enough data to sniff but more's to come, wait */
358         apr_brigade_length(ctx->bbsave, 0, &ctx->bblen);
359         if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) {
360             APR_BRIGADE_DO(b, ctx->bbsave) {
361                 if (APR_BUCKET_IS_EOS(b)) {
362                     ctx->flags |= ENC_SEEN_EOS;
363                     break;
364                 }
365             }
366             if (!(ctx->flags & ENC_SEEN_EOS)) {
367                 /* not enough data to sniff.  Wait for more */
368                 APR_BRIGADE_DO(b, ctx->bbsave) {
369                     rv = apr_bucket_setaside(b, f->r->pool);
370                     ap_assert(rv == APR_SUCCESS);
371                 }
372                 return APR_SUCCESS;
373             }
374         }
375         if (ctx->bblen == -1) {
376             ctx->bblen = BUFLEN-1;
377         }
378 
379         /* flatten it into a NULL-terminated string */
380         ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1));
381         ctx->bytes = (apr_size_t)ctx->bblen;
382         rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes);
383         ap_assert(rv == APR_SUCCESS);
384         ctx->buf[ctx->bytes] = 0;
385         sniff_encoding(f->r, ctx);
386 
387         /* FIXME: hook here for rewriting start-of-data? */
388         /* nah, we only have one action here - call it inline */
389         fix_skipto(f->r, ctx);
390 
391         /* we might change the Content-Length, so let's force its re-calculation */
392         apr_table_unset(f->r->headers_out, "Content-Length");
393 
394         /* consume the data we just sniffed */
395         /* we need to omit any <meta> we just invalidated */
396         ctx->flags |= ENC_INITIALISED;
397         ap_set_module_config(f->r->request_config, &xml2enc_module, ctx);
398     }
399     if (ctx->bbnext == NULL) {
400         ctx->bbnext = apr_brigade_create(f->r->pool,
401                                          f->r->connection->bucket_alloc);
402     }
403 
404     if (!ctx->convset) {
405         rv = ap_pass_brigade(f->next, ctx->bbsave);
406         apr_brigade_cleanup(ctx->bbsave);
407         ap_remove_output_filter(f);
408         return rv;
409     }
410     /* move the data back to bb */
411     APR_BRIGADE_CONCAT(bb, ctx->bbsave);
412 
413     while (!APR_BRIGADE_EMPTY(bb)) {
414         b = APR_BRIGADE_FIRST(bb);
415         ctx->bytes = 0;
416         if (APR_BUCKET_IS_METADATA(b)) {
417             APR_BUCKET_REMOVE(b);
418             APR_BRIGADE_INSERT_TAIL(ctx->bbnext, b);
419             /* Besides FLUSH, aggregate meta buckets to send them at
420              * once below. This resource filter is over on EOS.
421              */
422             pending_meta = 1;
423             if (APR_BUCKET_IS_EOS(b)) {
424                 ap_remove_output_filter(f);
425                 APR_BRIGADE_CONCAT(ctx->bbnext, bb);
426             }
427             else if (!APR_BUCKET_IS_FLUSH(b)) {
428                 continue;
429             }
430         }
431         if (pending_meta) {
432             pending_meta = 0;
433             /* passing meta bucket down the chain */
434             rv = ap_pass_brigade(f->next, ctx->bbnext);
435             apr_brigade_cleanup(ctx->bbnext);
436             if (rv != APR_SUCCESS) {
437                 return rv;
438             }
439             continue;
440         }
441         /* data bucket */
442         {
443             char* buf;
444             apr_size_t bytes = 0;
445             char fixbuf[BUFLEN];
446             apr_bucket* bdestroy = NULL;
447             if (insz > 0) { /* we have dangling data.  Flatten it. */
448                 buf = fixbuf;
449                 bytes = BUFLEN;
450                 rv = apr_brigade_flatten(bb, buf, &bytes);
451                 ap_assert(rv == APR_SUCCESS);
452                 if (bytes == insz) {
453                     /* this is only what we've already tried to convert.
454                      * The brigade is exhausted.
455                      * Save remaining data for next time round
456                      */
457 
458                     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01437)
459                                   "xml2enc: Setting aside %" APR_SIZE_T_FMT
460                                   " unconverted bytes", bytes);
461                     rv = ap_fflush(f->next, ctx->bbnext);
462                     APR_BRIGADE_CONCAT(ctx->bbsave, bb);
463                     APR_BRIGADE_DO(b, ctx->bbsave) {
464                         ap_assert(apr_bucket_setaside(b, f->r->pool)
465                                   == APR_SUCCESS);
466                     }
467                     return rv;
468                 }
469                 /* remove the data we've just read */
470                 rv = apr_brigade_partition(bb, bytes, &bstart);
471                 while (b = APR_BRIGADE_FIRST(bb), b != bstart) {
472                     apr_bucket_delete(b);
473                 }
474                 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01438)
475                               "xml2enc: consuming %" APR_SIZE_T_FMT
476                               " bytes flattened", bytes);
477             }
478             else {
479                 rv = apr_bucket_read(b, (const char**)&buf, &bytes,
480                                      APR_BLOCK_READ);
481                 APR_BUCKET_REMOVE(b);
482                 bdestroy = b;  /* can't destroy until finished with the data */
483                 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01439)
484                               "xml2enc: consuming %" APR_SIZE_T_FMT
485                               " bytes from bucket", bytes);
486             }
487             /* OK, we've got some input we can use in [buf,bytes] */
488             if (rv == APR_SUCCESS) {
489                 apr_size_t consumed;
490                 xml2enc_run_preprocess(f, &buf, &bytes);
491                 consumed = insz = bytes;
492                 while (insz > 0) {
493                     apr_status_t rv2;
494                     if (ctx->bytes == ctx->bblen) {
495                         /* nothing was converted last time!
496                          * break out of this loop!
497                          */
498                         b = apr_bucket_transient_create(buf+(bytes - insz), insz,
499                                                         bb->bucket_alloc);
500                         APR_BRIGADE_INSERT_HEAD(bb, b);
501                         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01440)
502                                       "xml2enc: reinserting %" APR_SIZE_T_FMT
503                                       " unconsumed bytes from bucket", insz);
504                         break;
505                     }
506                     ctx->bytes = (apr_size_t)ctx->bblen;
507                     rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz),
508                                                &insz, ctx->buf, &ctx->bytes);
509                     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01441)
510                                   "xml2enc: converted %" APR_SIZE_T_FMT
511                                   "/%" APR_OFF_T_FMT " bytes", consumed - insz,
512                                   ctx->bblen - ctx->bytes);
513                     consumed = insz;
514                     rv2 = ap_fwrite(f->next, ctx->bbnext, ctx->buf,
515                                     (apr_size_t)ctx->bblen - ctx->bytes);
516                     if (rv2 != APR_SUCCESS) {
517                         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv2, f->r, APLOGNO(01442)
518                                       "ap_fwrite failed");
519                         return rv2;
520                     }
521                     switch (rv) {
522                     case APR_SUCCESS:
523                         continue;
524                     case APR_EINCOMPLETE:
525                         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01443)
526                                       "INCOMPLETE");
527                         continue;     /* If outbuf too small, go round again.
528                                        * If it was inbuf, we'll break out when
529                                        * we test ctx->bytes == ctx->bblen
530                                        */
531                     case APR_EINVAL: /* try skipping one bad byte */
532                         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01444)
533                                    "Skipping invalid byte(s) in input stream!");
534                         --insz;
535                         continue;
536                     default:
537                         /* Erk!  What's this?
538                          * Bail out, flush, and hope to eat the buf raw
539                          */
540                         ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01445)
541                                       "Failed to convert input; trying it raw") ;
542                         ctx->convset = NULL;
543                         rv = ap_fflush(f->next, ctx->bbnext);
544                         if (rv != APR_SUCCESS)
545                             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01446)
546                                           "ap_fflush failed");
547                         apr_brigade_cleanup(ctx->bbnext);
548                     }
549                 }
550             } else {
551                 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01447)
552                               "xml2enc: error reading data") ;
553             }
554             if (bdestroy)
555                 apr_bucket_destroy(bdestroy);
556             if (rv != APR_SUCCESS)
557                 return rv;
558         }
559     }
560     if (pending_meta) {
561         /* passing pending meta bucket down the chain before leaving */
562         rv = ap_pass_brigade(f->next, ctx->bbnext);
563         apr_brigade_cleanup(ctx->bbnext);
564         if (rv != APR_SUCCESS) {
565             return rv;
566         }
567     }
568 
569     return APR_SUCCESS;
570 }
571 
xml2enc_charset(request_rec * r,xmlCharEncoding * encp,const char ** encoding)572 static apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp,
573                                     const char** encoding)
574 {
575     xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module);
576     if (!ctx || !(ctx->flags & ENC_INITIALISED)) {
577         return APR_EAGAIN;
578     }
579     *encp = ctx->xml2enc;
580     *encoding = ctx->encoding;
581     return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL;
582 }
583 
584 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
xml2enc_hooks(apr_pool_t * pool)585 static void xml2enc_hooks(apr_pool_t* pool)
586 {
587     ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc,
588                                        xml2enc_filter_init,
589                                        AP_FTYPE_RESOURCE, PROTO_FLAGS);
590     APR_REGISTER_OPTIONAL_FN(xml2enc_filter);
591     APR_REGISTER_OPTIONAL_FN(xml2enc_charset);
592     seek_meta_ctype = ap_pregcomp(pool,
593                        "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
594                                   AP_REG_EXTENDED|AP_REG_ICASE) ;
595     seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
596                                AP_REG_EXTENDED|AP_REG_ICASE) ;
597 }
set_alias(cmd_parms * cmd,void * CFG,const char * charset,const char * alias)598 static const char* set_alias(cmd_parms* cmd, void* CFG,
599                              const char* charset, const char* alias)
600 {
601     const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
602     if (errmsg != NULL)
603         return errmsg ;
604     else if (xmlAddEncodingAlias(charset, alias) == 0)
605         return NULL;
606     else
607         return "Error setting charset alias";
608 }
609 
set_default(cmd_parms * cmd,void * CFG,const char * charset)610 static const char* set_default(cmd_parms* cmd, void* CFG, const char* charset)
611 {
612     xml2cfg* cfg = CFG;
613     cfg->default_charset = charset;
614     cfg->default_encoding = xmlParseCharEncoding(charset);
615     switch(cfg->default_encoding) {
616     case XML_CHAR_ENCODING_NONE:
617         return "Default charset not found";
618     case XML_CHAR_ENCODING_ERROR:
619         return "Invalid or unsupported default charset";
620     default:
621         return NULL;
622     }
623 }
set_skipto(cmd_parms * cmd,void * CFG,const char * arg)624 static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg)
625 {
626     tattr* attr;
627     xml2cfg* cfg = CFG;
628     if (cfg->skipto == NULL)
629         cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
630     attr = apr_array_push(cfg->skipto) ;
631     attr->val = arg;
632     return NULL;
633 }
634 
635 static const command_rec xml2enc_cmds[] = {
636     AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL,
637                   "Usage: xml2EncDefault charset"),
638     AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF,
639                      "EncodingAlias charset alias [more aliases]"),
640     AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL,
641                     "Ignore anything in front of the first of these elements"),
642     { NULL }
643 };
xml2enc_config(apr_pool_t * pool,char * x)644 static void* xml2enc_config(apr_pool_t* pool, char* x)
645 {
646     xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
647     ret->default_encoding = XML_CHAR_ENCODING_NONE ;
648     return ret;
649 }
650 
xml2enc_merge(apr_pool_t * pool,void * BASE,void * ADD)651 static void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD)
652 {
653     xml2cfg* base = BASE;
654     xml2cfg* add = ADD;
655     xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg));
656     ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
657                           ? base->default_encoding : add->default_encoding ;
658     ret->default_charset = add->default_charset
659                          ? add->default_charset : base->default_charset;
660     ret->skipto = add->skipto ? add->skipto : base->skipto;
661     return ret;
662 }
663 
664 AP_DECLARE_MODULE(xml2enc) = {
665     STANDARD20_MODULE_STUFF,
666     xml2enc_config,
667     xml2enc_merge,
668     NULL,
669     NULL,
670     xml2enc_cmds,
671     xml2enc_hooks
672 };
673 
674 APR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess,
675                       (ap_filter_t *f, char** bufp, apr_size_t* bytesp),
676                       (f, bufp, bytesp), OK, DECLINED)
677