1 /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
2  * applicable.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr.h"
18 #include "apr_strings.h"
19 
20 #define APR_WANT_STDIO          /* for sprintf() */
21 #define APR_WANT_STRFUNC
22 #include "apr_want.h"
23 
24 #include "apr_xml.h"
25 
26 #include "apu_config.h"
27 
28 #ifdef APR_HAVE_OLD_EXPAT
29 #include "xmlparse.h"
30 #else
31 #include "expat.h"
32 #endif
33 
34 #define DEBUG_CR "\r\n"
35 
36 static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
37 static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
38 static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
39 
40 /* errors related to namespace processing */
41 #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
42 #define APR_XML_NS_ERROR_INVALID_DECL (-1001)
43 
44 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
45 #define APR_XML_NS_IS_RESERVED(name) \
46 	( (name[0] == 0x58 || name[0] == 0x78) && \
47 	  (name[1] == 0x4D || name[1] == 0x6D) && \
48 	  (name[2] == 0x4C || name[2] == 0x6C) )
49 
50 
51 /* the real (internal) definition of the parser context */
52 struct apr_xml_parser {
53     apr_xml_doc *doc;		/* the doc we're parsing */
54     apr_pool_t *p;		/* the pool we allocate from */
55     apr_xml_elem *cur_elem;	/* current element */
56 
57     int error;			/* an error has occurred */
58 #define APR_XML_ERROR_EXPAT             1
59 #define APR_XML_ERROR_PARSE_DONE        2
60 /* also: public APR_XML_NS_ERROR_* values (if any) */
61 
62     XML_Parser xp;              /* the actual (Expat) XML parser */
63     enum XML_Error xp_err;      /* stored Expat error code */
64 };
65 
66 /* struct for scoping namespace declarations */
67 typedef struct apr_xml_ns_scope {
68     const char *prefix;		/* prefix used for this ns */
69     int ns;			/* index into namespace table */
70     int emptyURI;		/* the namespace URI is the empty string */
71     struct apr_xml_ns_scope *next;	/* next scoped namespace */
72 } apr_xml_ns_scope;
73 
74 
75 /* return namespace table index for a given prefix */
find_prefix(apr_xml_parser * parser,const char * prefix)76 static int find_prefix(apr_xml_parser *parser, const char *prefix)
77 {
78     apr_xml_elem *elem = parser->cur_elem;
79 
80     /*
81     ** Walk up the tree, looking for a namespace scope that defines this
82     ** prefix.
83     */
84     for (; elem; elem = elem->parent) {
85 	apr_xml_ns_scope *ns_scope;
86 
87 	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
88 	    if (strcmp(prefix, ns_scope->prefix) == 0) {
89 		if (ns_scope->emptyURI) {
90 		    /*
91 		    ** It is possible to set the default namespace to an
92 		    ** empty URI string; this resets the default namespace
93 		    ** to mean "no namespace." We just found the prefix
94 		    ** refers to an empty URI, so return "no namespace."
95 		    */
96 		    return APR_XML_NS_NONE;
97 		}
98 
99 		return ns_scope->ns;
100 	    }
101 	}
102     }
103 
104     /*
105      * If the prefix is empty (""), this means that a prefix was not
106      * specified in the element/attribute. The search that was performed
107      * just above did not locate a default namespace URI (which is stored
108      * into ns_scope with an empty prefix). This means the element/attribute
109      * has "no namespace". We have a reserved value for this.
110      */
111     if (*prefix == '\0') {
112 	return APR_XML_NS_NONE;
113     }
114 
115     /* not found */
116     return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
117 }
118 
start_handler(void * userdata,const char * name,const char ** attrs)119 static void start_handler(void *userdata, const char *name, const char **attrs)
120 {
121     apr_xml_parser *parser = userdata;
122     apr_xml_elem *elem;
123     apr_xml_attr *attr;
124     apr_xml_attr *prev;
125     char *colon;
126     const char *quoted;
127     char *elem_name;
128 
129     /* punt once we find an error */
130     if (parser->error)
131 	return;
132 
133     elem = apr_pcalloc(parser->p, sizeof(*elem));
134 
135     /* prep the element */
136     elem->name = elem_name = apr_pstrdup(parser->p, name);
137 
138     /* fill in the attributes (note: ends up in reverse order) */
139     while (*attrs) {
140 	attr = apr_palloc(parser->p, sizeof(*attr));
141 	attr->name = apr_pstrdup(parser->p, *attrs++);
142 	attr->value = apr_pstrdup(parser->p, *attrs++);
143 	attr->next = elem->attr;
144 	elem->attr = attr;
145     }
146 
147     /* hook the element into the tree */
148     if (parser->cur_elem == NULL) {
149 	/* no current element; this also becomes the root */
150 	parser->cur_elem = parser->doc->root = elem;
151     }
152     else {
153 	/* this element appeared within the current elem */
154 	elem->parent = parser->cur_elem;
155 
156 	/* set up the child/sibling links */
157 	if (elem->parent->last_child == NULL) {
158 	    /* no first child either */
159 	    elem->parent->first_child = elem->parent->last_child = elem;
160 	}
161 	else {
162 	    /* hook onto the end of the parent's children */
163 	    elem->parent->last_child->next = elem;
164 	    elem->parent->last_child = elem;
165 	}
166 
167 	/* this element is now the current element */
168 	parser->cur_elem = elem;
169     }
170 
171     /* scan the attributes for namespace declarations */
172     for (prev = NULL, attr = elem->attr;
173 	 attr;
174 	 attr = attr->next) {
175 	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
176 	    const char *prefix = &attr->name[5];
177 	    apr_xml_ns_scope *ns_scope;
178 
179 	    /* test for xmlns:foo= form and xmlns= form */
180 	    if (*prefix == 0x3A) {
181                 /* a namespace prefix declaration must have a
182                    non-empty value. */
183                 if (attr->value[0] == '\0') {
184                     parser->error = APR_XML_NS_ERROR_INVALID_DECL;
185                     return;
186                 }
187 		++prefix;
188             }
189 	    else if (*prefix != '\0') {
190 		/* advance "prev" since "attr" is still present */
191 		prev = attr;
192 		continue;
193 	    }
194 
195 	    /* quote the URI before we ever start working with it */
196 	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
197 
198 	    /* build and insert the new scope */
199 	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
200 	    ns_scope->prefix = prefix;
201 	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
202 	    ns_scope->emptyURI = *quoted == '\0';
203 	    ns_scope->next = elem->ns_scope;
204 	    elem->ns_scope = ns_scope;
205 
206 	    /* remove this attribute from the element */
207 	    if (prev == NULL)
208 		elem->attr = attr->next;
209 	    else
210 		prev->next = attr->next;
211 
212 	    /* Note: prev will not be advanced since we just removed "attr" */
213 	}
214 	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
215 	    /* save away the language (in quoted form) */
216 	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
217 
218 	    /* remove this attribute from the element */
219 	    if (prev == NULL)
220 		elem->attr = attr->next;
221 	    else
222 		prev->next = attr->next;
223 
224 	    /* Note: prev will not be advanced since we just removed "attr" */
225 	}
226 	else {
227 	    /* advance "prev" since "attr" is still present */
228 	    prev = attr;
229 	}
230     }
231 
232     /*
233     ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
234     ** language from the parent element (if present).
235     **
236     ** NOTE: elem_size() *depends* upon this pointer equality.
237     */
238     if (elem->lang == NULL && elem->parent != NULL)
239 	elem->lang = elem->parent->lang;
240 
241     /* adjust the element's namespace */
242     colon = strchr(elem_name, 0x3A);
243     if (colon == NULL) {
244 	/*
245 	 * The element is using the default namespace, which will always
246 	 * be found. Either it will be "no namespace", or a default
247 	 * namespace URI has been specified at some point.
248 	 */
249 	elem->ns = find_prefix(parser, "");
250     }
251     else if (APR_XML_NS_IS_RESERVED(elem->name)) {
252 	elem->ns = APR_XML_NS_NONE;
253     }
254     else {
255 	*colon = '\0';
256 	elem->ns = find_prefix(parser, elem->name);
257 	elem->name = colon + 1;
258 
259 	if (APR_XML_NS_IS_ERROR(elem->ns)) {
260 	    parser->error = elem->ns;
261 	    return;
262 	}
263     }
264 
265     /* adjust all remaining attributes' namespaces */
266     for (attr = elem->attr; attr; attr = attr->next) {
267         /*
268          * apr_xml_attr defines this as "const" but we dup'd it, so we
269          * know that we can change it. a bit hacky, but the existing
270          * structure def is best.
271          */
272         char *attr_name = (char *)attr->name;
273 
274 	colon = strchr(attr_name, 0x3A);
275 	if (colon == NULL) {
276 	    /*
277 	     * Attributes do NOT use the default namespace. Therefore,
278 	     * we place them into the "no namespace" category.
279 	     */
280 	    attr->ns = APR_XML_NS_NONE;
281 	}
282 	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
283 	    attr->ns = APR_XML_NS_NONE;
284 	}
285 	else {
286 	    *colon = '\0';
287 	    attr->ns = find_prefix(parser, attr->name);
288 	    attr->name = colon + 1;
289 
290 	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
291 		parser->error = attr->ns;
292 		return;
293 	    }
294 	}
295     }
296 }
297 
end_handler(void * userdata,const char * name)298 static void end_handler(void *userdata, const char *name)
299 {
300     apr_xml_parser *parser = userdata;
301 
302     /* punt once we find an error */
303     if (parser->error)
304 	return;
305 
306     /* pop up one level */
307     parser->cur_elem = parser->cur_elem->parent;
308 }
309 
cdata_handler(void * userdata,const char * data,int len)310 static void cdata_handler(void *userdata, const char *data, int len)
311 {
312     apr_xml_parser *parser = userdata;
313     apr_xml_elem *elem;
314     apr_text_header *hdr;
315     const char *s;
316 
317     /* punt once we find an error */
318     if (parser->error)
319 	return;
320 
321     elem = parser->cur_elem;
322     s = apr_pstrndup(parser->p, data, len);
323 
324     if (elem->last_child == NULL) {
325 	/* no children yet. this cdata follows the start tag */
326 	hdr = &elem->first_cdata;
327     }
328     else {
329 	/* child elements exist. this cdata follows the last child. */
330 	hdr = &elem->last_child->following_cdata;
331     }
332 
333     apr_text_append(parser->p, hdr, s);
334 }
335 
cleanup_parser(void * ctx)336 static apr_status_t cleanup_parser(void *ctx)
337 {
338     apr_xml_parser *parser = ctx;
339 
340     XML_ParserFree(parser->xp);
341     parser->xp = NULL;
342 
343     return APR_SUCCESS;
344 }
345 
apr_xml_parser_create(apr_pool_t * pool)346 APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
347 {
348     apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
349 
350     parser->p = pool;
351     parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
352 
353     parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
354 
355     /* ### is there a way to avoid hard-coding this? */
356     apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
357 
358     parser->xp = XML_ParserCreate(NULL);
359     if (parser->xp == NULL) {
360         (*apr_pool_abort_get(pool))(APR_ENOMEM);
361         return NULL;
362     }
363 
364     apr_pool_cleanup_register(pool, parser, cleanup_parser,
365                               apr_pool_cleanup_null);
366 
367     XML_SetUserData(parser->xp, parser);
368     XML_SetElementHandler(parser->xp, start_handler, end_handler);
369     XML_SetCharacterDataHandler(parser->xp, cdata_handler);
370 
371     return parser;
372 }
373 
do_parse(apr_xml_parser * parser,const char * data,apr_size_t len,int is_final)374 static apr_status_t do_parse(apr_xml_parser *parser,
375                              const char *data, apr_size_t len,
376                              int is_final)
377 {
378     if (parser->xp == NULL) {
379         parser->error = APR_XML_ERROR_PARSE_DONE;
380     }
381     else {
382         int rv = XML_Parse(parser->xp, data, len, is_final);
383 
384         if (rv == 0) {
385             parser->error = APR_XML_ERROR_EXPAT;
386             parser->xp_err = XML_GetErrorCode(parser->xp);
387         }
388     }
389 
390     /* ### better error code? */
391     return parser->error ? APR_EGENERAL : APR_SUCCESS;
392 }
393 
apr_xml_parser_feed(apr_xml_parser * parser,const char * data,apr_size_t len)394 APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
395                                               const char *data,
396                                               apr_size_t len)
397 {
398     return do_parse(parser, data, len, 0 /* is_final */);
399 }
400 
apr_xml_parser_done(apr_xml_parser * parser,apr_xml_doc ** pdoc)401 APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
402                                               apr_xml_doc **pdoc)
403 {
404     char end;
405     apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
406 
407     /* get rid of the parser */
408     (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
409 
410     if (status)
411         return status;
412 
413     if (pdoc != NULL)
414         *pdoc = parser->doc;
415     return APR_SUCCESS;
416 }
417 
apr_xml_parser_geterror(apr_xml_parser * parser,char * errbuf,apr_size_t errbufsize)418 APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
419                                             char *errbuf,
420                                             apr_size_t errbufsize)
421 {
422     int error = parser->error;
423     const char *msg;
424 
425     /* clear our record of an error */
426     parser->error = 0;
427 
428     switch (error) {
429     case 0:
430         msg = "No error.";
431         break;
432 
433     case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
434         msg = "An undefined namespace prefix was used.";
435         break;
436 
437     case APR_XML_NS_ERROR_INVALID_DECL:
438         msg = "A namespace prefix was defined with an empty URI.";
439         break;
440 
441     case APR_XML_ERROR_EXPAT:
442         (void) apr_snprintf(errbuf, errbufsize,
443                             "XML parser error code: %s (%d)",
444                             XML_ErrorString(parser->xp_err), parser->xp_err);
445         return errbuf;
446 
447     case APR_XML_ERROR_PARSE_DONE:
448         msg = "The parser is not active.";
449         break;
450 
451     default:
452         msg = "There was an unknown error within the XML body.";
453         break;
454     }
455 
456     (void) apr_cpystrn(errbuf, msg, errbufsize);
457     return errbuf;
458 }
459 
apr_xml_parse_file(apr_pool_t * p,apr_xml_parser ** parser,apr_xml_doc ** ppdoc,apr_file_t * xmlfd,apr_size_t buffer_length)460 APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
461                                              apr_xml_parser **parser,
462                                              apr_xml_doc **ppdoc,
463                                              apr_file_t *xmlfd,
464                                              apr_size_t buffer_length)
465 {
466     apr_status_t rv;
467     char *buffer;
468     apr_size_t length;
469 
470     *parser = apr_xml_parser_create(p);
471     if (*parser == NULL) {
472         /* FIXME: returning an error code would be nice,
473          * but we dont get one ;( */
474         return APR_EGENERAL;
475     }
476     buffer = apr_palloc(p, buffer_length);
477     length = buffer_length;
478 
479     rv = apr_file_read(xmlfd, buffer, &length);
480 
481     while (rv == APR_SUCCESS) {
482         rv = apr_xml_parser_feed(*parser, buffer, length);
483         if (rv != APR_SUCCESS) {
484             return rv;
485         }
486 
487         length = buffer_length;
488         rv = apr_file_read(xmlfd, buffer, &length);
489     }
490     if (rv != APR_EOF) {
491         return rv;
492     }
493     rv = apr_xml_parser_done(*parser, ppdoc);
494     *parser = NULL;
495     return rv;
496 }
497 
apr_text_append(apr_pool_t * p,apr_text_header * hdr,const char * text)498 APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
499                                   const char *text)
500 {
501     apr_text *t = apr_palloc(p, sizeof(*t));
502 
503     t->text = text;
504     t->next = NULL;
505 
506     if (hdr->first == NULL) {
507 	/* no text elements yet */
508 	hdr->first = hdr->last = t;
509     }
510     else {
511 	/* append to the last text element */
512 	hdr->last->next = t;
513 	hdr->last = t;
514     }
515 }
516 
517 
518 /* ---------------------------------------------------------------
519 **
520 ** XML UTILITY FUNCTIONS
521 */
522 
523 /*
524 ** apr_xml_quote_string: quote an XML string
525 **
526 ** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
527 ** If quotes is true, then replace '"' with '&quot;'.
528 **
529 ** quotes is typically set to true for XML strings that will occur within
530 ** double quotes -- attribute values.
531 */
apr_xml_quote_string(apr_pool_t * p,const char * s,int quotes)532 APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
533                                                int quotes)
534 {
535     const char *scan;
536     apr_size_t len = 0;
537     apr_size_t extra = 0;
538     char *qstr;
539     char *qscan;
540     char c;
541 
542     for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
543 	if (c == '<' || c == '>')
544 	    extra += 3;		/* &lt; or &gt; */
545 	else if (c == '&')
546 	    extra += 4;		/* &amp; */
547 	else if (quotes && c == '"')
548 	    extra += 5;		/* &quot; */
549     }
550 
551     /* nothing to do? */
552     if (extra == 0)
553 	return s;
554 
555     qstr = apr_palloc(p, len + extra + 1);
556     for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
557 	if (c == '<') {
558 	    *qscan++ = '&';
559 	    *qscan++ = 'l';
560 	    *qscan++ = 't';
561 	    *qscan++ = ';';
562 	}
563 	else if (c == '>') {
564 	    *qscan++ = '&';
565 	    *qscan++ = 'g';
566 	    *qscan++ = 't';
567 	    *qscan++ = ';';
568 	}
569 	else if (c == '&') {
570 	    *qscan++ = '&';
571 	    *qscan++ = 'a';
572 	    *qscan++ = 'm';
573 	    *qscan++ = 'p';
574 	    *qscan++ = ';';
575 	}
576 	else if (quotes && c == '"') {
577 	    *qscan++ = '&';
578 	    *qscan++ = 'q';
579 	    *qscan++ = 'u';
580 	    *qscan++ = 'o';
581 	    *qscan++ = 't';
582 	    *qscan++ = ';';
583 	}
584 	else {
585 	    *qscan++ = c;
586 	}
587     }
588 
589     *qscan = '\0';
590     return qstr;
591 }
592 
593 /* how many characters for the given integer? */
594 #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
595                             (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
596                             (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
597                             (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
598 
text_size(const apr_text * t)599 static apr_size_t text_size(const apr_text *t)
600 {
601     apr_size_t size = 0;
602 
603     for (; t; t = t->next)
604 	size += strlen(t->text);
605     return size;
606 }
607 
elem_size(const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)608 static apr_size_t elem_size(const apr_xml_elem *elem, int style,
609                             apr_array_header_t *namespaces, int *ns_map)
610 {
611     apr_size_t size;
612 
613     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
614 	const apr_xml_attr *attr;
615 
616 	size = 0;
617 
618 	if (style == APR_XML_X2T_FULL_NS_LANG) {
619 	    int i;
620 
621 	    /*
622 	    ** The outer element will contain xmlns:ns%d="%s" attributes
623 	    ** and an xml:lang attribute, if applicable.
624 	    */
625 
626 	    for (i = namespaces->nelts; i--;) {
627 		/* compute size of: ' xmlns:ns%d="%s"' */
628 		size += (9 + APR_XML_NS_LEN(i) + 2 +
629 			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
630 	    }
631 
632 	    if (elem->lang != NULL) {
633 		/* compute size of: ' xml:lang="%s"' */
634 		size += 11 + strlen(elem->lang) + 1;
635 	    }
636 	}
637 
638 	if (elem->ns == APR_XML_NS_NONE) {
639 	    /* compute size of: <%s> */
640 	    size += 1 + strlen(elem->name) + 1;
641 	}
642 	else {
643 	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
644 
645 	    /* compute size of: <ns%d:%s> */
646 	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
647 	}
648 
649 	if (APR_XML_ELEM_IS_EMPTY(elem)) {
650 	    /* insert a closing "/" */
651 	    size += 1;
652 	}
653 	else {
654 	    /*
655 	     * two of above plus "/":
656 	     *     <ns%d:%s> ... </ns%d:%s>
657 	     * OR  <%s> ... </%s>
658 	     */
659 	    size = 2 * size + 1;
660 	}
661 
662 	for (attr = elem->attr; attr; attr = attr->next) {
663 	    if (attr->ns == APR_XML_NS_NONE) {
664 		/* compute size of: ' %s="%s"' */
665 		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
666 	    }
667 	    else {
668 		/* compute size of: ' ns%d:%s="%s"' */
669 		size += 3 + APR_XML_NS_LEN(attr->ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
670 	    }
671 	}
672 
673 	/*
674 	** If the element has an xml:lang value that is *different* from
675 	** its parent, then add the thing in: ' xml:lang="%s"'.
676 	**
677 	** NOTE: we take advantage of the pointer equality established by
678 	** the parsing for "inheriting" the xml:lang values from parents.
679 	*/
680 	if (elem->lang != NULL &&
681 	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
682 	    size += 11 + strlen(elem->lang) + 1;
683 	}
684     }
685     else if (style == APR_XML_X2T_LANG_INNER) {
686 	/*
687 	 * This style prepends the xml:lang value plus a null terminator.
688 	 * If a lang value is not present, then we insert a null term.
689 	 */
690 	size = elem->lang ? strlen(elem->lang) + 1 : 1;
691     }
692     else
693 	size = 0;
694 
695     size += text_size(elem->first_cdata.first);
696 
697     for (elem = elem->first_child; elem; elem = elem->next) {
698 	/* the size of the child element plus the CDATA that follows it */
699 	size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
700 		 text_size(elem->following_cdata.first));
701     }
702 
703     return size;
704 }
705 
write_text(char * s,const apr_text * t)706 static char *write_text(char *s, const apr_text *t)
707 {
708     for (; t; t = t->next) {
709 	apr_size_t len = strlen(t->text);
710 	memcpy(s, t->text, len);
711 	s += len;
712     }
713     return s;
714 }
715 
write_elem(char * s,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map)716 static char *write_elem(char *s, const apr_xml_elem *elem, int style,
717 			apr_array_header_t *namespaces, int *ns_map)
718 {
719     const apr_xml_elem *child;
720     apr_size_t len;
721     int ns;
722 
723     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
724 	int empty = APR_XML_ELEM_IS_EMPTY(elem);
725 	const apr_xml_attr *attr;
726 
727 	if (elem->ns == APR_XML_NS_NONE) {
728 	    len = sprintf(s, "<%s", elem->name);
729 	}
730 	else {
731 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
732 	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
733 	}
734 	s += len;
735 
736 	for (attr = elem->attr; attr; attr = attr->next) {
737 	    if (attr->ns == APR_XML_NS_NONE)
738 		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
739 	    else
740 		len = sprintf(s, " ns%d:%s=\"%s\"", attr->ns, attr->name, attr->value);
741 	    s += len;
742 	}
743 
744 	/* add the xml:lang value if necessary */
745 	if (elem->lang != NULL &&
746 	    (style == APR_XML_X2T_FULL_NS_LANG ||
747 	     elem->parent == NULL ||
748 	     elem->lang != elem->parent->lang)) {
749 	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
750 	    s += len;
751 	}
752 
753 	/* add namespace definitions, if required */
754 	if (style == APR_XML_X2T_FULL_NS_LANG) {
755 	    int i;
756 
757 	    for (i = namespaces->nelts; i--;) {
758 		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
759 			      APR_XML_GET_URI_ITEM(namespaces, i));
760 		s += len;
761 	    }
762 	}
763 
764 	/* no more to do. close it up and go. */
765 	if (empty) {
766 	    *s++ = '/';
767 	    *s++ = '>';
768 	    return s;
769 	}
770 
771 	/* just close it */
772 	*s++ = '>';
773     }
774     else if (style == APR_XML_X2T_LANG_INNER) {
775 	/* prepend the xml:lang value */
776 	if (elem->lang != NULL) {
777 	    len = strlen(elem->lang);
778 	    memcpy(s, elem->lang, len);
779 	    s += len;
780 	}
781 	*s++ = '\0';
782     }
783 
784     s = write_text(s, elem->first_cdata.first);
785 
786     for (child = elem->first_child; child; child = child->next) {
787 	s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
788 	s = write_text(s, child->following_cdata.first);
789     }
790 
791     if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
792 	if (elem->ns == APR_XML_NS_NONE) {
793 	    len = sprintf(s, "</%s>", elem->name);
794 	}
795 	else {
796 	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
797 	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
798 	}
799 	s += len;
800     }
801 
802     return s;
803 }
804 
apr_xml_quote_elem(apr_pool_t * p,apr_xml_elem * elem)805 APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
806 {
807     apr_text *scan_txt;
808     apr_xml_attr *scan_attr;
809     apr_xml_elem *scan_elem;
810 
811     /* convert the element's text */
812     for (scan_txt = elem->first_cdata.first;
813 	 scan_txt != NULL;
814 	 scan_txt = scan_txt->next) {
815 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
816     }
817     for (scan_txt = elem->following_cdata.first;
818 	 scan_txt != NULL;
819 	 scan_txt = scan_txt->next) {
820 	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
821     }
822 
823     /* convert the attribute values */
824     for (scan_attr = elem->attr;
825 	 scan_attr != NULL;
826 	 scan_attr = scan_attr->next) {
827 	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
828     }
829 
830     /* convert the child elements */
831     for (scan_elem = elem->first_child;
832 	 scan_elem != NULL;
833 	 scan_elem = scan_elem->next) {
834 	apr_xml_quote_elem(p, scan_elem);
835     }
836 }
837 
838 /* convert an element to a text string */
apr_xml_to_text(apr_pool_t * p,const apr_xml_elem * elem,int style,apr_array_header_t * namespaces,int * ns_map,const char ** pbuf,apr_size_t * psize)839 APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
840                                   int style, apr_array_header_t *namespaces,
841                                   int *ns_map, const char **pbuf,
842                                   apr_size_t *psize)
843 {
844     /* get the exact size, plus a null terminator */
845     apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
846     char *s = apr_palloc(p, size);
847 
848     (void) write_elem(s, elem, style, namespaces, ns_map);
849     s[size - 1] = '\0';
850 
851     *pbuf = s;
852     if (psize)
853 	*psize = size;
854 }
855 
apr_xml_empty_elem(apr_pool_t * p,const apr_xml_elem * elem)856 APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
857                                              const apr_xml_elem *elem)
858 {
859     if (elem->ns == APR_XML_NS_NONE) {
860 	/*
861 	 * The prefix (xml...) is already within the prop name, or
862 	 * the element simply has no prefix.
863 	 */
864 	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
865     }
866 
867     return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
868 }
869 
870 /* return the URI's (existing) index, or insert it and return a new index */
apr_xml_insert_uri(apr_array_header_t * uri_array,const char * uri)871 APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
872                                     const char *uri)
873 {
874     int i;
875     const char **pelt;
876 
877     /* never insert an empty URI; this index is always APR_XML_NS_NONE */
878     if (*uri == '\0')
879         return APR_XML_NS_NONE;
880 
881     for (i = uri_array->nelts; i--;) {
882 	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
883 	    return i;
884     }
885 
886     pelt = apr_array_push(uri_array);
887     *pelt = uri;		/* assume uri is const or in a pool */
888     return uri_array->nelts - 1;
889 }
890 
891 /* convert the element to EBCDIC */
892 #if APR_CHARSET_EBCDIC
apr_xml_parser_convert_elem(apr_xml_elem * e,apr_xlate_t * convset)893 static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
894                                                 apr_xlate_t *convset)
895 {
896     apr_xml_attr *a;
897     apr_xml_elem *ec;
898     apr_text *t;
899     apr_size_t inbytes_left, outbytes_left;
900     apr_status_t status;
901 
902     inbytes_left = outbytes_left = strlen(e->name);
903     status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
904     if (status) {
905         return status;
906     }
907 
908     for (t = e->first_cdata.first; t != NULL; t = t->next) {
909         inbytes_left = outbytes_left = strlen(t->text);
910         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
911         if (status) {
912             return status;
913         }
914     }
915 
916     for (t = e->following_cdata.first;  t != NULL; t = t->next) {
917         inbytes_left = outbytes_left = strlen(t->text);
918         status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
919         if (status) {
920             return status;
921         }
922     }
923 
924     for (a = e->attr; a != NULL; a = a->next) {
925         inbytes_left = outbytes_left = strlen(a->name);
926         status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
927         if (status) {
928             return status;
929         }
930         inbytes_left = outbytes_left = strlen(a->value);
931         status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
932         if (status) {
933             return status;
934         }
935     }
936 
937     for (ec = e->first_child; ec != NULL; ec = ec->next) {
938         status = apr_xml_parser_convert_elem(ec, convset);
939         if (status) {
940             return status;
941         }
942     }
943     return APR_SUCCESS;
944 }
945 
946 /* convert the whole document to EBCDIC */
apr_xml_parser_convert_doc(apr_pool_t * pool,apr_xml_doc * pdoc,apr_xlate_t * convset)947 APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
948                                                      apr_xml_doc *pdoc,
949                                                      apr_xlate_t *convset)
950 {
951     apr_status_t status;
952     /* Don't convert the namespaces: they are constant! */
953     if (pdoc->namespaces != NULL) {
954         int i;
955         apr_array_header_t *namespaces;
956         namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
957         if (namespaces == NULL)
958             return APR_ENOMEM;
959         for (i = 0; i < pdoc->namespaces->nelts; i++) {
960             apr_size_t inbytes_left, outbytes_left;
961             char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
962             ptr = apr_pstrdup(pool, ptr);
963             if ( ptr == NULL)
964                 return APR_ENOMEM;
965             inbytes_left = outbytes_left = strlen(ptr);
966             status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
967             if (status) {
968                 return status;
969             }
970             apr_xml_insert_uri(namespaces, ptr);
971         }
972         pdoc->namespaces = namespaces;
973     }
974     return apr_xml_parser_convert_elem(pdoc->root, convset);
975 }
976 #endif
977