1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* Copyright (C) 2001 by First Peer, Inc. All rights reserved.
3 ** Copyright (C) 2002 Ximian, Inc.
4 **
5 ** Redistribution and use in source and binary forms, with or without
6 ** modification, are permitted provided that the following conditions
7 ** are met:
8 ** 1. Redistributions of source code must retain the above copyright
9 **    notice, this list of conditions and the following disclaimer.
10 ** 2. Redistributions in binary form must reproduce the above copyright
11 **    notice, this list of conditions and the following disclaimer in the
12 **    documentation and/or other materials provided with the distribution.
13 ** 3. The name of the author may not be used to endorse or promote products
14 **    derived from this software without specific prior written permission.
15 **
16 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 ** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ** ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 ** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 ** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 ** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 ** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 ** SUCH DAMAGE. */
27 
28 #include "xmlrpc_config.h"
29 
30 #include <assert.h>
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <string.h>
34 /* There was code here from 2006-2013 that included <xmlparser.h>
35    instead of <libxml/parser.h> when compiling for Windows.  It was probably
36    compiled rarely if ever (this file is an optional part of the build).
37    In Feburary 2013, a Mingw user found <libxml/parser.h> was necessary,
38    and that makes more sense, so we changed it.
39 */
40 #include <libxml/parser.h>
41 
42 #include "mallocvar.h"
43 #include "xmlrpc-c/base.h"
44 #include "xmlrpc-c/base_int.h"
45 #include "xmlrpc-c/string_int.h"
46 
47 #include "xmlparser.h"
48 
49 struct _xml_element {
50     xml_element * parentP;
51     const char * name;
52     xmlrpc_mem_block * cdataP;    /* char */
53     xmlrpc_mem_block * childrenP; /* xml_element* */
54 };
55 
56 #define XMLRPC_ASSERT_ELEM_OK(elem) \
57     XMLRPC_ASSERT((elem) != NULL && (elem)->name != XMLRPC_BAD_POINTER)
58 
59 
60 
61 void
xml_init(xmlrpc_env * const envP)62 xml_init(xmlrpc_env * const envP) {
63 
64     XMLRPC_ASSERT_ENV_OK(envP);
65 
66     /* N.B. xmlInitParser() does not stack.  Calling it twice is the
67        same as calling it once.  Consequently, the same is true
68        of xml_init().
69 
70        N.B. xmlInitParser() is necessary for form only, because every
71        libxml2 subroutine that needs it to be called just calls it itself.
72     */
73     xmlInitParser();
74 }
75 
76 
77 
78 void
xml_term(void)79 xml_term(void) {
80 
81     /* N.B xmlCleanupParser() doesn't know how many times you called
82        xmlInitParser().  Calling it twice is the same as calling it once.
83        This means you must not call xml_term() while anything else in
84        the process is still using libxml2.
85     */
86     xmlCleanupParser();
87 }
88 
89 
90 
91 static xml_element *
xmlElementNew(xmlrpc_env * const envP,const char * const name)92 xmlElementNew(xmlrpc_env * const envP,
93               const char * const name) {
94 /*----------------------------------------------------------------------------
95   Create a new xml_element. This routine isn't exported, because the
96   arguments are implementation-dependent.
97 -----------------------------------------------------------------------------*/
98 
99     xml_element * retval;
100 
101     XMLRPC_ASSERT_ENV_OK(envP);
102     assert(name != NULL);
103 
104     MALLOCVAR(retval);
105 
106     if (!retval)
107         xmlrpc_faultf(envP, "Couldn't allocate memory for XML element");
108     else {
109         retval->parentP = NULL;
110 
111         retval->name = strdup(name);
112 
113         if (!retval->name)
114             xmlrpc_faultf(envP, "Couldn't allocate memory for name field "
115                           "of XML element");
116         else {
117             retval->cdataP = XMLRPC_MEMBLOCK_NEW(char, envP, 0);
118 
119             if (!envP->fault_occurred) {
120                 retval->childrenP =
121                     XMLRPC_MEMBLOCK_NEW(xml_element, envP, 0);
122                 if (!envP->fault_occurred) {
123                     if (envP->fault_occurred)
124                         xmlrpc_mem_block_free(retval->childrenP);
125                 }
126                 if (envP->fault_occurred)
127                     xmlrpc_mem_block_free(retval->cdataP);
128             }
129             if (envP->fault_occurred)
130                 xmlrpc_strfree(retval->name);
131         }
132         if (envP->fault_occurred)
133             free(retval);
134     }
135     return retval;
136 }
137 
138 
139 
140 void
xml_element_free(xml_element * const elemP)141 xml_element_free(xml_element * const elemP) {
142 /*----------------------------------------------------------------------------
143   Blow away an existing element and all of its child elements.
144 -----------------------------------------------------------------------------*/
145     unsigned int size;
146     unsigned int i;
147     xml_element ** contents;
148 
149     XMLRPC_ASSERT_ELEM_OK(elemP);
150 
151     xmlrpc_strfree(elemP->name);
152     elemP->name = XMLRPC_BAD_POINTER;
153     xmlrpc_mem_block_free(elemP->cdataP);
154 
155     /* Deallocate all of our children recursively. */
156     contents = XMLRPC_MEMBLOCK_CONTENTS(xml_element *, elemP->childrenP);
157     size = XMLRPC_MEMBLOCK_SIZE(xml_element *, elemP->childrenP);
158     for (i = 0; i < size; ++i)
159         xml_element_free(contents[i]);
160 
161     xmlrpc_mem_block_free(elemP->childrenP);
162 
163     free(elemP);
164 }
165 
166 
167 
168 /*=========================================================================
169 **  Miscellaneous Accessors
170 **=========================================================================
171 **  Return the fields of the xml_element. See the header for more
172 **  documentation on each function works.
173 */
174 
175 const char *
xml_element_name(const xml_element * const elemP)176 xml_element_name(const xml_element * const elemP) {
177 
178     XMLRPC_ASSERT_ELEM_OK(elemP);
179     return elemP->name;
180 }
181 
182 size_t
xml_element_cdata_size(const xml_element * const elemP)183 xml_element_cdata_size(const xml_element * const elemP) {
184     /* The result of this function is NOT VALID until the end_element handler
185        has been called!
186     */
187     XMLRPC_ASSERT_ELEM_OK(elemP);
188     return XMLRPC_MEMBLOCK_SIZE(char, elemP->cdataP) - 1;
189 }
190 
191 
192 
193 const char *
xml_element_cdata(const xml_element * const elemP)194 xml_element_cdata(const xml_element * const elemP) {
195     XMLRPC_ASSERT_ELEM_OK(elemP);
196     return XMLRPC_MEMBLOCK_CONTENTS(char, elemP->cdataP);
197 }
198 
199 
200 
201 unsigned int
xml_element_children_size(const xml_element * const elemP)202 xml_element_children_size(const xml_element * const elemP) {
203     XMLRPC_ASSERT_ELEM_OK(elemP);
204     return XMLRPC_MEMBLOCK_SIZE(xml_element *, elemP->childrenP);
205 }
206 
207 
208 
209 xml_element **
xml_element_children(const xml_element * const elemP)210 xml_element_children(const xml_element * const elemP) {
211     XMLRPC_ASSERT_ELEM_OK(elemP);
212     return XMLRPC_MEMBLOCK_CONTENTS(xml_element *, elemP->childrenP);
213 }
214 
215 
216 
217 /*=========================================================================
218 **  Internal xml_element Utility Functions
219 **=========================================================================
220 */
221 
222 static void
xmlElementAppendCdata(xmlrpc_env * const envP,xml_element * const elemP,const char * const cdata,size_t const size)223 xmlElementAppendCdata(xmlrpc_env *  const envP,
224 				      xml_element * const elemP,
225 				      const char *  const cdata,
226 				      size_t        const size) {
227 
228     XMLRPC_ASSERT_ENV_OK(envP);
229     XMLRPC_ASSERT_ELEM_OK(elemP);
230 
231     XMLRPC_MEMBLOCK_APPEND(char, envP, elemP->cdataP, cdata, size);
232 }
233 
234 
235 
236 static void
xmlElementAppendChild(xmlrpc_env * const envP,xml_element * const elemP,xml_element * const childP)237 xmlElementAppendChild(xmlrpc_env *  const envP,
238 				      xml_element * const elemP,
239 				      xml_element * const childP) {
240 
241     /* Whether or not this function succeeds, it takes ownership of the 'child'
242        argument.
243        WARNING - This is the exact opposite of the usual memory ownership
244        rules for xmlrpc_value! So please pay attention.
245     */
246     XMLRPC_ASSERT_ENV_OK(envP);
247     XMLRPC_ASSERT_ELEM_OK(elemP);
248     XMLRPC_ASSERT_ELEM_OK(childP);
249     assert(childP->parentP == NULL);
250 
251     XMLRPC_MEMBLOCK_APPEND(xml_element *, envP, elemP->childrenP, &childP, 1);
252     if (!envP->fault_occurred)
253         childP->parentP = elemP;
254     else
255         xml_element_free(childP);
256 }
257 
258 
259 
260 typedef struct {
261 /*----------------------------------------------------------------------------
262    Our parse context. We pass this around as libxml user data.
263 -----------------------------------------------------------------------------*/
264 
265     xmlrpc_env env;
266     xml_element * rootP;
267     xml_element * currentP;
268 } ParseContext;
269 
270 
271 
272 /*=========================================================================
273 **  LibXML Event Handler Functions
274 **=========================================================================
275 */
276 
277 static void
startElement_(void * const userData,const xmlChar * const name,const xmlChar ** const attrs ATTR_UNUSED)278 startElement_(void *           const userData,
279               const xmlChar *  const name,
280               const xmlChar ** const attrs ATTR_UNUSED) {
281 
282     ParseContext * contextP;
283     xml_element  * elemP;
284     xml_element  * newCurrentP;
285 
286     assert(userData != NULL && name != NULL);
287 
288     /* Get our context and see if an error has already occured. */
289     contextP = (ParseContext*) userData;
290     if (!contextP->env.fault_occurred) {
291         /* Build a new element. */
292         elemP = xmlElementNew(&contextP->env, (char *) name);
293         XMLRPC_FAIL_IF_FAULT(&contextP->env);
294 
295         /* Insert it in the appropriate place. */
296         if (!contextP->rootP) {
297             contextP->rootP = elemP;
298             contextP->currentP = elemP;
299             elemP = NULL;
300         } else {
301             assert(contextP->currentP != NULL);
302 
303             /* (We need to watch our error handling invariants very carefully
304             ** here. Read the docs for xml_elementAppendChild. */
305             newCurrentP = elemP;
306             xmlElementAppendChild(&contextP->env, contextP->currentP, elemP);
307             elemP = NULL;
308             XMLRPC_FAIL_IF_FAULT(&contextP->env);
309             contextP->currentP = newCurrentP;
310         }
311 
312     cleanup:
313         if (elemP)
314             xml_element_free(elemP);
315     }
316 }
317 
318 
319 
320 static void
endElement_(void * const userData,const xmlChar * const name ATTR_UNUSED)321 endElement_(void *          const userData,
322             const xmlChar * const name ATTR_UNUSED) {
323 
324     ParseContext * contextP;
325 
326     assert(userData != NULL && name != NULL);
327 
328     /* Get our context and see if an error has already occured. */
329     contextP = (ParseContext*) userData;
330     if (!contextP->env.fault_occurred) {
331         assert(xmlrpc_streq((const char *)name,
332                             contextP->currentP->name));
333         assert(contextP->currentP->parentP != NULL ||
334                contextP->currentP == contextP->rootP);
335 
336         /* Add a trailing '\0' to our cdata. */
337         xmlElementAppendCdata(&contextP->env, contextP->currentP, "\0", 1);
338         if (!contextP->env.fault_occurred) {
339             /* Pop our "stack" of elements. */
340             contextP->currentP = contextP->currentP->parentP;
341         }
342     }
343 }
344 
345 
346 
347 static void
characterData(void * const userData,const xmlChar * const s,int const len)348 characterData(void *          const userData,
349               const xmlChar * const s,
350               int             const len) {
351 
352     ParseContext * contextP;
353 
354     assert(userData != NULL && s != NULL);
355 
356     /* Get our context and see if an error has already occured. */
357     contextP = (ParseContext*)userData;
358     if (!contextP->env.fault_occurred) {
359         assert(contextP->currentP != NULL);
360 
361         xmlElementAppendCdata(&contextP->env,
362                               contextP->currentP,
363                               (char *)s,
364                               len);
365     }
366 }
367 
368 
369 
370 /*=========================================================================
371 **  LibXML Driver
372 **=========================================================================
373 **  XXX - We should allow the user to specify the encoding of our xml_data.
374 */
375 
376 static xmlSAXHandler const saxHandler = {
377     NULL,      /* internalSubset */
378     NULL,      /* isStandalone */
379     NULL,      /* hasInternalSubset */
380     NULL,      /* hasExternalSubset */
381     NULL,      /* resolveEntity */
382     NULL,      /* getEntity */
383     NULL,      /* entityDecl */
384     NULL,      /* notationDecl */
385     NULL,      /* attributeDecl */
386     NULL,      /* elementDecl */
387     NULL,      /* unparsedEntityDecl */
388     NULL,      /* setDocumentLocator */
389     NULL,      /* startDocument */
390     NULL,      /* endDocument */
391     startElement_,       /* startElement */
392     endElement_,         /* endElement */
393     NULL,      /* reference */
394     characterData,      /* characters */
395     NULL,      /* ignorableWhitespace */
396     NULL,      /* processingInstruction */
397     NULL,      /* comment */
398     NULL,      /* warning */
399     NULL,      /* error */
400     NULL,      /* fatalError */
401     NULL,      /* getParameterEntity */
402     NULL,      /* cdataBlock */
403     NULL,      /* externalSubset */
404     1          /* initialized */
405 
406     /* Following are SAX2 fields. Any ifdef here? */
407 
408     ,NULL,     /* _private */
409     NULL,      /* startElementNs */
410     NULL,      /* endElementNs */
411     NULL       /* serror */
412 };
413 
414 
415 
416 static void
removeDocSizeLimit(xmlParserCtxt * const parserP ATTR_UNUSED)417 removeDocSizeLimit(xmlParserCtxt * const parserP ATTR_UNUSED) {
418 /*----------------------------------------------------------------------------
419    Set up *parserP to accept a document of any size.
420 
421    Newer Libxml2 by default fails any attempt to parse a document larger than
422    10 MiB, because it wants to avoid running the system out of resources.
423    This is not an appropriate role for the library, and 10 MiB is a completely
424    arbitrary number anyway, so we don't use this feature.
425 -----------------------------------------------------------------------------*/
426 #if LIBXML_VERSION > 20700
427 
428     xmlCtxtUseOptions(parserP, XML_PARSE_HUGE);
429 
430 #else
431 
432     /* There's never a size limit */
433 
434 #endif
435 }
436 
437 
438 
439 static void
createParser(xmlrpc_env * const envP,ParseContext * const contextP,xmlParserCtxt ** const parserPP)440 createParser(xmlrpc_env *     const envP,
441              ParseContext *   const contextP,
442              xmlParserCtxt ** const parserPP) {
443 /*----------------------------------------------------------------------------
444    Create an appropriate Libxml2 parser for our purpose.
445 -----------------------------------------------------------------------------*/
446     xmlParserCtxt * parserP;
447 
448     parserP = xmlCreatePushParserCtxt((xmlSAXHandler *)&saxHandler, contextP,
449                                         NULL, 0, NULL);
450 
451     if (!parserP)
452         xmlrpc_faultf(envP, "Failed to create libxml2 parser.");
453     else {
454         removeDocSizeLimit(parserP);
455 
456         *parserPP = parserP;
457     }
458 }
459 
460 
461 
462 void
xml_parse(xmlrpc_env * const envP,const char * const xmlData,size_t const xmlDataLen,xmlrpc_mem_pool * const memPoolP ATTR_UNUSED,xml_element ** const resultPP)463 xml_parse(xmlrpc_env *      const envP,
464           const char *      const xmlData,
465           size_t            const xmlDataLen,
466           xmlrpc_mem_pool * const memPoolP ATTR_UNUSED,
467           xml_element **    const resultPP) {
468 /*----------------------------------------------------------------------------
469   This is an implementation of the interface declared in xmlparser.h.  This
470   implementation uses Libxml2.
471 -----------------------------------------------------------------------------*/
472     ParseContext context;
473     xmlParserCtxt * parserP;
474 
475     XMLRPC_ASSERT_ENV_OK(envP);
476     assert(xmlData != NULL);
477 
478     xmlrpc_env_init(&context.env);
479     context.rootP    = NULL;
480     context.currentP = NULL;
481 
482     createParser(envP, &context, &parserP);
483 
484     if (!envP->fault_occurred) {
485         int rc;
486 
487         rc = xmlParseChunk(parserP, xmlData, xmlDataLen, 1);
488 
489         if (rc != 0)
490             xmlrpc_env_set_fault(envP, XMLRPC_PARSE_ERROR,
491                                  "XML parsing failed");
492         else {
493             if (context.env.fault_occurred) {
494                 xmlrpc_env_set_fault(envP, XMLRPC_PARSE_ERROR,
495                                      context.env.fault_string);
496                 /* This should be done by the parser, but I'm not sure which
497                    callbacks need to do it.
498                 */
499                 if (context.rootP)
500                     xml_element_free(context.rootP);
501             } else {
502                 assert(context.rootP != NULL);
503                 assert(context.currentP == NULL);
504 
505                 *resultPP = context.rootP;
506             }
507         }
508         /* xmlParseChunk() creates a document.  You find it with
509            parserP->myDoc.
510         */
511         if (parserP->myDoc)
512             xmlFreeDoc(parserP->myDoc);
513         xmlFreeParserCtxt(parserP);
514     }
515     xmlrpc_env_clean(&context.env);
516 }
517 
518 
519 
520