1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* Copyright (C) 2001 by First Peer, Inc. All rights reserved.
3 ** Copyright (C) 2002 Ximian, Inc.
4 **
5 ** Redistribution and use in source and binary forms, with or without
6 ** modification, are permitted provided that the following conditions
7 ** are met:
8 ** 1. Redistributions of source code must retain the above copyright
9 ** notice, this list of conditions and the following disclaimer.
10 ** 2. Redistributions in binary form must reproduce the above copyright
11 ** notice, this list of conditions and the following disclaimer in the
12 ** documentation and/or other materials provided with the distribution.
13 ** 3. The name of the author may not be used to endorse or promote products
14 ** derived from this software without specific prior written permission.
15 **
16 ** THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 ** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ** ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 ** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 ** OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 ** LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 ** OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 ** SUCH DAMAGE. */
27
28 #include "xmlrpc_config.h"
29
30 #include <assert.h>
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <string.h>
34 /* There was code here from 2006-2013 that included <xmlparser.h>
35 instead of <libxml/parser.h> when compiling for Windows. It was probably
36 compiled rarely if ever (this file is an optional part of the build).
37 In Feburary 2013, a Mingw user found <libxml/parser.h> was necessary,
38 and that makes more sense, so we changed it.
39 */
40 #include <libxml/parser.h>
41
42 #include "mallocvar.h"
43 #include "xmlrpc-c/base.h"
44 #include "xmlrpc-c/base_int.h"
45 #include "xmlrpc-c/string_int.h"
46
47 #include "xmlparser.h"
48
49 struct _xml_element {
50 xml_element * parentP;
51 const char * name;
52 xmlrpc_mem_block * cdataP; /* char */
53 xmlrpc_mem_block * childrenP; /* xml_element* */
54 };
55
56 #define XMLRPC_ASSERT_ELEM_OK(elem) \
57 XMLRPC_ASSERT((elem) != NULL && (elem)->name != XMLRPC_BAD_POINTER)
58
59
60
61 void
xml_init(xmlrpc_env * const envP)62 xml_init(xmlrpc_env * const envP) {
63
64 XMLRPC_ASSERT_ENV_OK(envP);
65
66 /* N.B. xmlInitParser() does not stack. Calling it twice is the
67 same as calling it once. Consequently, the same is true
68 of xml_init().
69
70 N.B. xmlInitParser() is necessary for form only, because every
71 libxml2 subroutine that needs it to be called just calls it itself.
72 */
73 xmlInitParser();
74 }
75
76
77
78 void
xml_term(void)79 xml_term(void) {
80
81 /* N.B xmlCleanupParser() doesn't know how many times you called
82 xmlInitParser(). Calling it twice is the same as calling it once.
83 This means you must not call xml_term() while anything else in
84 the process is still using libxml2.
85 */
86 xmlCleanupParser();
87 }
88
89
90
91 static xml_element *
xmlElementNew(xmlrpc_env * const envP,const char * const name)92 xmlElementNew(xmlrpc_env * const envP,
93 const char * const name) {
94 /*----------------------------------------------------------------------------
95 Create a new xml_element. This routine isn't exported, because the
96 arguments are implementation-dependent.
97 -----------------------------------------------------------------------------*/
98
99 xml_element * retval;
100
101 XMLRPC_ASSERT_ENV_OK(envP);
102 assert(name != NULL);
103
104 MALLOCVAR(retval);
105
106 if (!retval)
107 xmlrpc_faultf(envP, "Couldn't allocate memory for XML element");
108 else {
109 retval->parentP = NULL;
110
111 retval->name = strdup(name);
112
113 if (!retval->name)
114 xmlrpc_faultf(envP, "Couldn't allocate memory for name field "
115 "of XML element");
116 else {
117 retval->cdataP = XMLRPC_MEMBLOCK_NEW(char, envP, 0);
118
119 if (!envP->fault_occurred) {
120 retval->childrenP =
121 XMLRPC_MEMBLOCK_NEW(xml_element, envP, 0);
122 if (!envP->fault_occurred) {
123 if (envP->fault_occurred)
124 xmlrpc_mem_block_free(retval->childrenP);
125 }
126 if (envP->fault_occurred)
127 xmlrpc_mem_block_free(retval->cdataP);
128 }
129 if (envP->fault_occurred)
130 xmlrpc_strfree(retval->name);
131 }
132 if (envP->fault_occurred)
133 free(retval);
134 }
135 return retval;
136 }
137
138
139
140 void
xml_element_free(xml_element * const elemP)141 xml_element_free(xml_element * const elemP) {
142 /*----------------------------------------------------------------------------
143 Blow away an existing element and all of its child elements.
144 -----------------------------------------------------------------------------*/
145 unsigned int size;
146 unsigned int i;
147 xml_element ** contents;
148
149 XMLRPC_ASSERT_ELEM_OK(elemP);
150
151 xmlrpc_strfree(elemP->name);
152 elemP->name = XMLRPC_BAD_POINTER;
153 xmlrpc_mem_block_free(elemP->cdataP);
154
155 /* Deallocate all of our children recursively. */
156 contents = XMLRPC_MEMBLOCK_CONTENTS(xml_element *, elemP->childrenP);
157 size = XMLRPC_MEMBLOCK_SIZE(xml_element *, elemP->childrenP);
158 for (i = 0; i < size; ++i)
159 xml_element_free(contents[i]);
160
161 xmlrpc_mem_block_free(elemP->childrenP);
162
163 free(elemP);
164 }
165
166
167
168 /*=========================================================================
169 ** Miscellaneous Accessors
170 **=========================================================================
171 ** Return the fields of the xml_element. See the header for more
172 ** documentation on each function works.
173 */
174
175 const char *
xml_element_name(const xml_element * const elemP)176 xml_element_name(const xml_element * const elemP) {
177
178 XMLRPC_ASSERT_ELEM_OK(elemP);
179 return elemP->name;
180 }
181
182 size_t
xml_element_cdata_size(const xml_element * const elemP)183 xml_element_cdata_size(const xml_element * const elemP) {
184 /* The result of this function is NOT VALID until the end_element handler
185 has been called!
186 */
187 XMLRPC_ASSERT_ELEM_OK(elemP);
188 return XMLRPC_MEMBLOCK_SIZE(char, elemP->cdataP) - 1;
189 }
190
191
192
193 const char *
xml_element_cdata(const xml_element * const elemP)194 xml_element_cdata(const xml_element * const elemP) {
195 XMLRPC_ASSERT_ELEM_OK(elemP);
196 return XMLRPC_MEMBLOCK_CONTENTS(char, elemP->cdataP);
197 }
198
199
200
201 unsigned int
xml_element_children_size(const xml_element * const elemP)202 xml_element_children_size(const xml_element * const elemP) {
203 XMLRPC_ASSERT_ELEM_OK(elemP);
204 return XMLRPC_MEMBLOCK_SIZE(xml_element *, elemP->childrenP);
205 }
206
207
208
209 xml_element **
xml_element_children(const xml_element * const elemP)210 xml_element_children(const xml_element * const elemP) {
211 XMLRPC_ASSERT_ELEM_OK(elemP);
212 return XMLRPC_MEMBLOCK_CONTENTS(xml_element *, elemP->childrenP);
213 }
214
215
216
217 /*=========================================================================
218 ** Internal xml_element Utility Functions
219 **=========================================================================
220 */
221
222 static void
xmlElementAppendCdata(xmlrpc_env * const envP,xml_element * const elemP,const char * const cdata,size_t const size)223 xmlElementAppendCdata(xmlrpc_env * const envP,
224 xml_element * const elemP,
225 const char * const cdata,
226 size_t const size) {
227
228 XMLRPC_ASSERT_ENV_OK(envP);
229 XMLRPC_ASSERT_ELEM_OK(elemP);
230
231 XMLRPC_MEMBLOCK_APPEND(char, envP, elemP->cdataP, cdata, size);
232 }
233
234
235
236 static void
xmlElementAppendChild(xmlrpc_env * const envP,xml_element * const elemP,xml_element * const childP)237 xmlElementAppendChild(xmlrpc_env * const envP,
238 xml_element * const elemP,
239 xml_element * const childP) {
240
241 /* Whether or not this function succeeds, it takes ownership of the 'child'
242 argument.
243 WARNING - This is the exact opposite of the usual memory ownership
244 rules for xmlrpc_value! So please pay attention.
245 */
246 XMLRPC_ASSERT_ENV_OK(envP);
247 XMLRPC_ASSERT_ELEM_OK(elemP);
248 XMLRPC_ASSERT_ELEM_OK(childP);
249 assert(childP->parentP == NULL);
250
251 XMLRPC_MEMBLOCK_APPEND(xml_element *, envP, elemP->childrenP, &childP, 1);
252 if (!envP->fault_occurred)
253 childP->parentP = elemP;
254 else
255 xml_element_free(childP);
256 }
257
258
259
260 typedef struct {
261 /*----------------------------------------------------------------------------
262 Our parse context. We pass this around as libxml user data.
263 -----------------------------------------------------------------------------*/
264
265 xmlrpc_env env;
266 xml_element * rootP;
267 xml_element * currentP;
268 } ParseContext;
269
270
271
272 /*=========================================================================
273 ** LibXML Event Handler Functions
274 **=========================================================================
275 */
276
277 static void
startElement_(void * const userData,const xmlChar * const name,const xmlChar ** const attrs ATTR_UNUSED)278 startElement_(void * const userData,
279 const xmlChar * const name,
280 const xmlChar ** const attrs ATTR_UNUSED) {
281
282 ParseContext * contextP;
283 xml_element * elemP;
284 xml_element * newCurrentP;
285
286 assert(userData != NULL && name != NULL);
287
288 /* Get our context and see if an error has already occured. */
289 contextP = (ParseContext*) userData;
290 if (!contextP->env.fault_occurred) {
291 /* Build a new element. */
292 elemP = xmlElementNew(&contextP->env, (char *) name);
293 XMLRPC_FAIL_IF_FAULT(&contextP->env);
294
295 /* Insert it in the appropriate place. */
296 if (!contextP->rootP) {
297 contextP->rootP = elemP;
298 contextP->currentP = elemP;
299 elemP = NULL;
300 } else {
301 assert(contextP->currentP != NULL);
302
303 /* (We need to watch our error handling invariants very carefully
304 ** here. Read the docs for xml_elementAppendChild. */
305 newCurrentP = elemP;
306 xmlElementAppendChild(&contextP->env, contextP->currentP, elemP);
307 elemP = NULL;
308 XMLRPC_FAIL_IF_FAULT(&contextP->env);
309 contextP->currentP = newCurrentP;
310 }
311
312 cleanup:
313 if (elemP)
314 xml_element_free(elemP);
315 }
316 }
317
318
319
320 static void
endElement_(void * const userData,const xmlChar * const name ATTR_UNUSED)321 endElement_(void * const userData,
322 const xmlChar * const name ATTR_UNUSED) {
323
324 ParseContext * contextP;
325
326 assert(userData != NULL && name != NULL);
327
328 /* Get our context and see if an error has already occured. */
329 contextP = (ParseContext*) userData;
330 if (!contextP->env.fault_occurred) {
331 assert(xmlrpc_streq((const char *)name,
332 contextP->currentP->name));
333 assert(contextP->currentP->parentP != NULL ||
334 contextP->currentP == contextP->rootP);
335
336 /* Add a trailing '\0' to our cdata. */
337 xmlElementAppendCdata(&contextP->env, contextP->currentP, "\0", 1);
338 if (!contextP->env.fault_occurred) {
339 /* Pop our "stack" of elements. */
340 contextP->currentP = contextP->currentP->parentP;
341 }
342 }
343 }
344
345
346
347 static void
characterData(void * const userData,const xmlChar * const s,int const len)348 characterData(void * const userData,
349 const xmlChar * const s,
350 int const len) {
351
352 ParseContext * contextP;
353
354 assert(userData != NULL && s != NULL);
355
356 /* Get our context and see if an error has already occured. */
357 contextP = (ParseContext*)userData;
358 if (!contextP->env.fault_occurred) {
359 assert(contextP->currentP != NULL);
360
361 xmlElementAppendCdata(&contextP->env,
362 contextP->currentP,
363 (char *)s,
364 len);
365 }
366 }
367
368
369
370 /*=========================================================================
371 ** LibXML Driver
372 **=========================================================================
373 ** XXX - We should allow the user to specify the encoding of our xml_data.
374 */
375
376 static xmlSAXHandler const saxHandler = {
377 NULL, /* internalSubset */
378 NULL, /* isStandalone */
379 NULL, /* hasInternalSubset */
380 NULL, /* hasExternalSubset */
381 NULL, /* resolveEntity */
382 NULL, /* getEntity */
383 NULL, /* entityDecl */
384 NULL, /* notationDecl */
385 NULL, /* attributeDecl */
386 NULL, /* elementDecl */
387 NULL, /* unparsedEntityDecl */
388 NULL, /* setDocumentLocator */
389 NULL, /* startDocument */
390 NULL, /* endDocument */
391 startElement_, /* startElement */
392 endElement_, /* endElement */
393 NULL, /* reference */
394 characterData, /* characters */
395 NULL, /* ignorableWhitespace */
396 NULL, /* processingInstruction */
397 NULL, /* comment */
398 NULL, /* warning */
399 NULL, /* error */
400 NULL, /* fatalError */
401 NULL, /* getParameterEntity */
402 NULL, /* cdataBlock */
403 NULL, /* externalSubset */
404 1 /* initialized */
405
406 /* Following are SAX2 fields. Any ifdef here? */
407
408 ,NULL, /* _private */
409 NULL, /* startElementNs */
410 NULL, /* endElementNs */
411 NULL /* serror */
412 };
413
414
415
416 static void
removeDocSizeLimit(xmlParserCtxt * const parserP ATTR_UNUSED)417 removeDocSizeLimit(xmlParserCtxt * const parserP ATTR_UNUSED) {
418 /*----------------------------------------------------------------------------
419 Set up *parserP to accept a document of any size.
420
421 Newer Libxml2 by default fails any attempt to parse a document larger than
422 10 MiB, because it wants to avoid running the system out of resources.
423 This is not an appropriate role for the library, and 10 MiB is a completely
424 arbitrary number anyway, so we don't use this feature.
425 -----------------------------------------------------------------------------*/
426 #if LIBXML_VERSION > 20700
427
428 xmlCtxtUseOptions(parserP, XML_PARSE_HUGE);
429
430 #else
431
432 /* There's never a size limit */
433
434 #endif
435 }
436
437
438
439 static void
createParser(xmlrpc_env * const envP,ParseContext * const contextP,xmlParserCtxt ** const parserPP)440 createParser(xmlrpc_env * const envP,
441 ParseContext * const contextP,
442 xmlParserCtxt ** const parserPP) {
443 /*----------------------------------------------------------------------------
444 Create an appropriate Libxml2 parser for our purpose.
445 -----------------------------------------------------------------------------*/
446 xmlParserCtxt * parserP;
447
448 parserP = xmlCreatePushParserCtxt((xmlSAXHandler *)&saxHandler, contextP,
449 NULL, 0, NULL);
450
451 if (!parserP)
452 xmlrpc_faultf(envP, "Failed to create libxml2 parser.");
453 else {
454 removeDocSizeLimit(parserP);
455
456 *parserPP = parserP;
457 }
458 }
459
460
461
462 void
xml_parse(xmlrpc_env * const envP,const char * const xmlData,size_t const xmlDataLen,xmlrpc_mem_pool * const memPoolP ATTR_UNUSED,xml_element ** const resultPP)463 xml_parse(xmlrpc_env * const envP,
464 const char * const xmlData,
465 size_t const xmlDataLen,
466 xmlrpc_mem_pool * const memPoolP ATTR_UNUSED,
467 xml_element ** const resultPP) {
468 /*----------------------------------------------------------------------------
469 This is an implementation of the interface declared in xmlparser.h. This
470 implementation uses Libxml2.
471 -----------------------------------------------------------------------------*/
472 ParseContext context;
473 xmlParserCtxt * parserP;
474
475 XMLRPC_ASSERT_ENV_OK(envP);
476 assert(xmlData != NULL);
477
478 xmlrpc_env_init(&context.env);
479 context.rootP = NULL;
480 context.currentP = NULL;
481
482 createParser(envP, &context, &parserP);
483
484 if (!envP->fault_occurred) {
485 int rc;
486
487 rc = xmlParseChunk(parserP, xmlData, xmlDataLen, 1);
488
489 if (rc != 0)
490 xmlrpc_env_set_fault(envP, XMLRPC_PARSE_ERROR,
491 "XML parsing failed");
492 else {
493 if (context.env.fault_occurred) {
494 xmlrpc_env_set_fault(envP, XMLRPC_PARSE_ERROR,
495 context.env.fault_string);
496 /* This should be done by the parser, but I'm not sure which
497 callbacks need to do it.
498 */
499 if (context.rootP)
500 xml_element_free(context.rootP);
501 } else {
502 assert(context.rootP != NULL);
503 assert(context.currentP == NULL);
504
505 *resultPP = context.rootP;
506 }
507 }
508 /* xmlParseChunk() creates a document. You find it with
509 parserP->myDoc.
510 */
511 if (parserP->myDoc)
512 xmlFreeDoc(parserP->myDoc);
513 xmlFreeParserCtxt(parserP);
514 }
515 xmlrpc_env_clean(&context.env);
516 }
517
518
519
520