1 /*
2    Higher Level Interface to XML Parsers.
3    Copyright (C) 1999-2004, Joe Orton <joe@manyfish.co.uk>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with this library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18    MA 02111-1307, USA
19 
20 */
21 
22 #include "config.h"
23 
24 #ifdef HAVE_STDLIB_H
25 #include <stdlib.h>
26 #endif
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30 #ifdef HAVE_STRINGS_H
31 #include <strings.h>
32 #endif
33 
34 #include "ne_i18n.h"
35 
36 #include "ne_alloc.h"
37 #include "ne_xml.h"
38 #include "ne_utils.h"
39 #include "ne_string.h"
40 
41 #if defined(HAVE_EXPAT)
42 /* expat support: */
43 #ifdef HAVE_XMLPARSE_H
44 #include "xmlparse.h"
45 #else
46 #include <expat.h>
47 #endif
48 typedef XML_Char ne_xml_char;
49 #elif defined(HAVE_LIBXML)
50 /* libxml2 support: */
51 #include <libxml/xmlversion.h>
52 #include <libxml/parser.h>
53 typedef xmlChar ne_xml_char;
54 
55 #else /* not HAVE_LIBXML */
56 #  error need an XML parser
57 #endif /* not HAVE_EXPAT */
58 
59 /* Approx. one screen of text: */
60 #define ERR_SIZE (2048)
61 
62 struct handler {
63     ne_xml_startelm_cb *startelm_cb; /* start-element callback */
64     ne_xml_endelm_cb *endelm_cb; /* end-element callback */
65     ne_xml_cdata_cb *cdata_cb; /* character-data callback. */
66     void *userdata; /* userdata for the above. */
67     struct handler *next; /* next handler in stack. */
68 };
69 
70 #ifdef HAVE_LIBXML
71 static void sax_error(void *ctx, const char *msg, ...);
72 #endif
73 
74 struct element {
75     const ne_xml_char *nspace;
76     ne_xml_char *name;
77 
78     int state; /* opaque state integer */
79 
80     /* Namespaces declared in this element */
81     ne_xml_char *default_ns; /* A default namespace */
82     struct namespace *nspaces; /* List of other namespace scopes */
83 
84     struct handler *handler; /* Handler for this element */
85 
86     struct element *parent; /* parent element, or NULL */
87 };
88 
89 /* We pass around a ne_xml_parser as the userdata in the parsing
90  * library.  This maintains the current state of the parse and various
91  * other bits and bobs. Within the parse, we store the current branch
92  * of the tree, i.e., the current element and all its parents, up to
93  * the root, but nothing other than that.  */
94 struct ne_xml_parser_s {
95     struct element *root; /* the root of the document */
96     struct element *current; /* current element in the branch */
97     struct handler *top_handlers; /* always points at the
98 					   * handler on top of the stack. */
99     int valid; /* non-zero whilst parse should continue */
100     int prune; /* if non-zero, depth within a dead branch */
101 
102 #ifdef HAVE_EXPAT
103     XML_Parser parser;
104     char *encoding;
105 #else
106     xmlParserCtxtPtr parser;
107 #endif
108     char error[ERR_SIZE];
109 };
110 
111 /* The callback handlers */
112 static void start_element(void *userdata, const ne_xml_char *name, const ne_xml_char **atts);
113 static void end_element(void *userdata, const ne_xml_char *name);
114 static void char_data(void *userdata, const ne_xml_char *cdata, int len);
115 static const char *resolve_nspace(const struct element *elm,
116                                   const char *prefix, size_t pfxlen);
117 
118 /* Linked list of namespace scopes */
119 struct namespace {
120     ne_xml_char *name;
121     ne_xml_char *uri;
122     struct namespace *next;
123 };
124 
125 #ifdef HAVE_LIBXML
126 
127 /* Could be const as far as we care, but libxml doesn't want that */
128 static xmlSAXHandler sax_handler = {
129     NULL, /* internalSubset */
130     NULL, /* isStandalone */
131     NULL, /* hasInternalSubset */
132     NULL, /* hasExternalSubset */
133     NULL, /* resolveEntity */
134     NULL, /* getEntity */
135     NULL, /* entityDecl */
136     NULL, /* notationDecl */
137     NULL, /* attributeDecl */
138     NULL, /* elementDecl */
139     NULL, /* unparsedEntityDecl */
140     NULL, /* setDocumentLocator */
141     NULL, /* startDocument */
142     NULL, /* endDocument */
143     start_element, /* startElement */
144     end_element, /* endElement */
145     NULL, /* reference */
146     char_data, /* characters */
147     NULL, /* ignorableWhitespace */
148     NULL, /* processingInstruction */
149     NULL, /* comment */
150     NULL, /* xmlParserWarning */
151     sax_error, /* xmlParserError */
152     sax_error, /* fatal error (never called by libxml2?) */
153     NULL, /* getParameterEntity */
154     char_data /* cdataBlock */
155 };
156 
157 /* empty attributes array to mimic expat behaviour */
158 static const char *empty_atts[] = {NULL, NULL};
159 
160 /* macro for determining the attributes array to pass */
161 #define PASS_ATTS(atts) (atts ? (const char **)(atts) : empty_atts)
162 
163 #else
164 
165 #define PASS_ATTS(atts) ((const char **)(atts))
166 
167 /* XML declaration callback for expat. */
decl_handler(void * userdata,const XML_Char * version,const XML_Char * encoding,int standalone)168 static void decl_handler(void *userdata,
169 			 const XML_Char *version, const XML_Char *encoding,
170 			 int standalone)
171 {
172     ne_xml_parser *p = userdata;
173     if (encoding) p->encoding = ne_strdup(encoding);
174 }
175 
176 #endif /* HAVE_LIBXML */
177 
ne_xml_currentline(ne_xml_parser * p)178 int ne_xml_currentline(ne_xml_parser *p)
179 {
180 #ifdef HAVE_EXPAT
181     return XML_GetCurrentLineNumber(p->parser);
182 #else
183     return p->parser->input->line;
184 #endif
185 }
186 
ne_xml_doc_encoding(const ne_xml_parser * p)187 const char *ne_xml_doc_encoding(const ne_xml_parser *p)
188 {
189 #ifdef HAVE_LIBXML
190     return p->parser->encoding;
191 #else
192     return p->encoding;
193 #endif
194 }
195 
196 /* Extract the namespace prefix declarations from 'atts'. */
declare_nspaces(ne_xml_parser * p,struct element * elm,const ne_xml_char ** atts)197 static int declare_nspaces(ne_xml_parser *p, struct element *elm,
198                            const ne_xml_char **atts)
199 {
200     int n;
201 
202     for (n = 0; atts && atts[n]; n += 2) {
203         if (strcasecmp(atts[n], "xmlns") == 0) {
204             /* New default namespace */
205             elm->default_ns = ne_strdup(atts[n+1]);
206         } else if (strncasecmp(atts[n], "xmlns:", 6) == 0) {
207             struct namespace *ns;
208 
209             if (atts[n][6] == '\0' || atts[n+1][0] == '\0') {
210                 ne_snprintf(p->error, ERR_SIZE,
211                             ("XML parse error at line %d: invalid namespace "
212                              "declaration"), ne_xml_currentline(p));
213                 return -1;
214             }
215 
216             /* New namespace scope */
217             ns = ne_calloc(sizeof(*ns));
218             ns->next = elm->nspaces;
219             elm->nspaces = ns;
220             ns->name = ne_strdup(atts[n]+6); /* skip the xmlns= */
221             ns->uri = ne_strdup(atts[n+1]);
222         }
223     }
224 
225     return 0;
226 }
227 
228 /* Expand an XML qualified name, which may include a namespace prefix
229  * as well as the local part. */
expand_qname(ne_xml_parser * p,struct element * elm,const ne_xml_char * qname)230 static int expand_qname(ne_xml_parser *p, struct element *elm,
231                         const ne_xml_char *qname)
232 {
233     const ne_xml_char *pfx;
234 
235     pfx = strchr(qname, ':');
236     if (pfx == NULL) {
237         struct element *e = elm;
238 
239         /* Find default namespace; guaranteed to terminate as the root
240          * element always has default_ns="". */
241         while (e->default_ns == NULL)
242             e = e->parent;
243 
244         elm->name = ne_strdup(qname);
245         elm->nspace = e->default_ns;
246     } else {
247         const char *uri = resolve_nspace(elm, qname, pfx-qname);
248 
249 	if (uri) {
250 	    /* The name is everything after the ':' */
251 	    if (pfx[1] == '\0') {
252 		ne_snprintf(p->error, ERR_SIZE,
253 			    ("XML parse error at line %d: element name missing"
254                              "after namespace prefix"), ne_xml_currentline(p));
255 		return -1;
256 	    }
257 	    elm->name = ne_strdup(pfx+1);
258             elm->nspace = uri;
259 	} else {
260 	    ne_snprintf(p->error, ERR_SIZE,
261                         ("XML parse error at line %d: undeclared namespace"),
262                         ne_xml_currentline(p));
263 	    return -1;
264 	}
265     }
266     return 0;
267 }
268 
269 /* Called with the start of a new element. */
start_element(void * userdata,const ne_xml_char * name,const ne_xml_char ** atts)270 static void start_element(void *userdata, const ne_xml_char *name,
271 			  const ne_xml_char **atts)
272 {
273     ne_xml_parser *p = userdata;
274     struct element *elm;
275     struct handler *hand;
276     int state = NE_XML_DECLINE;
277 
278     if (!p->valid) return;
279 
280     if (p->prune) {
281         p->prune++;
282         return;
283     }
284 
285     /* Create a new element */
286     elm = ne_calloc(sizeof *elm);
287     elm->parent = p->current;
288     p->current = elm;
289 
290     if (declare_nspaces(p, elm, atts) || expand_qname(p, elm, name)) {
291         p->valid = 0;
292         return;
293     }
294 
295     /* Find a handler which will accept this element (or abort the parse) */
296     for (hand = elm->parent->handler; hand && state == NE_XML_DECLINE;
297          hand = hand->next) {
298         elm->handler = hand;
299         state = hand->startelm_cb(hand->userdata, elm->parent->state,
300                                   elm->nspace, elm->name, PASS_ATTS(atts));
301     }
302 
303     NE_DEBUG(NE_DBG_XMLPARSE, "XML: start-element (%d, {%s, %s}) => %d\n",
304              elm->parent->state, elm->nspace, elm->name, state);
305 
306     if (state > 0)
307         elm->state = state;
308     else if (state == NE_XML_DECLINE)
309         /* prune this branch. */
310         p->prune++;
311     else /* state == NE_XML_ABORT */
312         p->valid = 0;
313 }
314 
315 /* Destroys an element structure. */
destroy_element(struct element * elm)316 static void destroy_element(struct element *elm)
317 {
318     struct namespace *this_ns, *next_ns;
319     ne_free(elm->name);
320     /* Free the namespaces */
321     this_ns = elm->nspaces;
322     while (this_ns != NULL) {
323 	next_ns = this_ns->next;
324 	ne_free(this_ns->name);
325 	ne_free(this_ns->uri);
326 	ne_free(this_ns);
327 	this_ns = next_ns;
328     };
329     if (elm->default_ns)
330         ne_free(elm->default_ns);
331     ne_free(elm);
332 }
333 
334 /* cdata SAX callback */
char_data(void * userdata,const ne_xml_char * data,int len)335 static void char_data(void *userdata, const ne_xml_char *data, int len)
336 {
337     ne_xml_parser *p = userdata;
338     struct element *elm = p->current;
339 
340     if (!p->valid || p->prune) return;
341 
342     if (elm->handler->cdata_cb &&
343         elm->handler->cdata_cb(elm->handler->userdata, elm->state, data, len)) {
344         NE_DEBUG(NE_DBG_XML, "Cdata callback failed.\n");
345         p->valid = 0;
346     }
347 }
348 
349 /* Called with the end of an element */
end_element(void * userdata,const ne_xml_char * name)350 static void end_element(void *userdata, const ne_xml_char *name)
351 {
352     ne_xml_parser *p = userdata;
353     struct element *elm = p->current;
354 
355     if (!p->valid) return;
356 
357     if (p->prune) {
358         if (p->prune-- > 1) return;
359     } else if (elm->handler->endelm_cb &&
360                elm->handler->endelm_cb(elm->handler->userdata, elm->state,
361                                        elm->nspace, elm->name)) {
362         NE_DEBUG(NE_DBG_XML, "XML: end-element for %d failed.\n", elm->state);
363         p->valid = 0;
364     }
365 
366     NE_DEBUG(NE_DBG_XMLPARSE, "XML: end-element (%d, {%s, %s})\n",
367              elm->state, elm->nspace, elm->name);
368 
369     /* move back up the tree */
370     p->current = elm->parent;
371     p->prune = 0;
372 
373     destroy_element(elm);
374 }
375 
376 /* Find a namespace definition for 'prefix' in given element, where
377  * length of prefix is 'pfxlen'.  Returns the URI or NULL. */
resolve_nspace(const struct element * elm,const char * prefix,size_t pfxlen)378 static const char *resolve_nspace(const struct element *elm,
379                                   const char *prefix, size_t pfxlen)
380 {
381     const struct element *s;
382 
383     /* Search up the tree. */
384     for (s = elm; s != NULL; s = s->parent) {
385 	const struct namespace *ns;
386 	/* Iterate over defined spaces on this node. */
387 	for (ns = s->nspaces; ns != NULL; ns = ns->next) {
388 	    if (strlen(ns->name) == pfxlen &&
389 		memcmp(ns->name, prefix, pfxlen) == 0)
390 		return ns->uri;
391 	}
392     }
393 
394     return NULL;
395 }
396 
ne_xml_create(void)397 ne_xml_parser *ne_xml_create(void)
398 {
399     ne_xml_parser *p = ne_calloc(sizeof *p);
400     /* Initialize other stuff */
401     p->valid = 1;
402     /* Placeholder for the root element */
403     p->current = p->root = ne_calloc(sizeof *p->root);
404     p->root->default_ns = "";
405     p->root->state = 0;
406     strcpy(p->error, _("Unknown error"));
407 #ifdef HAVE_EXPAT
408     p->parser = XML_ParserCreate(NULL);
409     if (p->parser == NULL) {
410 	abort();
411     }
412     XML_SetElementHandler(p->parser, start_element, end_element);
413     XML_SetCharacterDataHandler(p->parser, char_data);
414     XML_SetUserData(p->parser, (void *) p);
415     XML_SetXmlDeclHandler(p->parser, decl_handler);
416 #else
417     p->parser = xmlCreatePushParserCtxt(&sax_handler,
418 					(void *)p, NULL, 0, NULL);
419     if (p->parser == NULL) {
420 	abort();
421     }
422     p->parser->replaceEntities = 1;
423 #endif
424     return p;
425 }
426 
ne_xml_push_handler(ne_xml_parser * p,ne_xml_startelm_cb * startelm_cb,ne_xml_cdata_cb * cdata_cb,ne_xml_endelm_cb * endelm_cb,void * userdata)427 void ne_xml_push_handler(ne_xml_parser *p,
428 			 ne_xml_startelm_cb *startelm_cb,
429 			 ne_xml_cdata_cb *cdata_cb,
430 			 ne_xml_endelm_cb *endelm_cb,
431 			 void *userdata)
432 {
433     struct handler *hand = ne_calloc(sizeof(struct handler));
434 
435     hand->startelm_cb = startelm_cb;
436     hand->cdata_cb = cdata_cb;
437     hand->endelm_cb = endelm_cb;
438     hand->userdata = userdata;
439 
440     /* If this is the first handler registered, update the
441      * base pointer too. */
442     if (p->top_handlers == NULL) {
443 	p->root->handler = hand;
444 	p->top_handlers = hand;
445     } else {
446 	p->top_handlers->next = hand;
447 	p->top_handlers = hand;
448     }
449 }
450 
ne_xml_parse_v(void * userdata,const char * block,size_t len)451 void ne_xml_parse_v(void *userdata, const char *block, size_t len)
452 {
453     ne_xml_parser *p = userdata;
454     /* FIXME: The two XML parsers break all our nice abstraction by
455      * choosing different char *'s. The swine. This cast will come
456      * back and bite us someday, no doubt. */
457     ne_xml_parse(p, block, len);
458 }
459 
460 /* Parse the given block of input of length len */
ne_xml_parse(ne_xml_parser * p,const char * block,size_t len)461 void ne_xml_parse(ne_xml_parser *p, const char *block, size_t len)
462 {
463     int ret, flag;
464     /* duck out if it's broken */
465     if (!p->valid) {
466 	NE_DEBUG(NE_DBG_XML, "Not parsing %" NE_FMT_SIZE_T " bytes.\n",
467 		 len);
468 	return;
469     }
470     if (len == 0) {
471 	flag = -1;
472 	block = "";
473 	NE_DEBUG(NE_DBG_XML, "Got 0-length buffer, end of document.\n");
474     } else {
475 	NE_DEBUG(NE_DBG_XML, "Parsing %" NE_FMT_SIZE_T " length buffer.\n",
476 		 len);
477 	flag = 0;
478     }
479     /* Note, don't write a parser error if !p->valid, since an error
480      * will already have been written in that case. */
481 #ifdef HAVE_EXPAT
482     ret = XML_Parse(p->parser, block, len, flag);
483     NE_DEBUG(NE_DBG_XMLPARSE, "XML_Parse returned %d\n", ret);
484     if (ret == 0 && p->valid) {
485 	ne_snprintf(p->error, ERR_SIZE,
486 		    "XML parse error at line %d: %s",
487 		    XML_GetCurrentLineNumber(p->parser),
488 		    XML_ErrorString(XML_GetErrorCode(p->parser)));
489 	p->valid = 0;
490     }
491 #else
492     ret = xmlParseChunk(p->parser, block, len, flag);
493     NE_DEBUG(NE_DBG_XMLPARSE, "xmlParseChunk returned %d\n", ret);
494     /* Parse errors are normally caught by the sax_error() callback,
495      * which clears p->valid. */
496     if (p->parser->errNo && p->valid) {
497 	ne_snprintf(p->error, ERR_SIZE, "XML parse error at line %d.",
498 		    ne_xml_currentline(p));
499 	p->valid = 0;
500     }
501 #endif
502 }
503 
ne_xml_valid(ne_xml_parser * p)504 int ne_xml_valid(ne_xml_parser *p)
505 {
506     return p->valid;
507 }
508 
ne_xml_destroy(ne_xml_parser * p)509 void ne_xml_destroy(ne_xml_parser *p)
510 {
511     struct element *elm, *parent;
512     struct handler *hand, *next;
513 
514     /* Free up the handlers on the stack: the root element has the
515      * pointer to the base of the handler stack. */
516     for (hand = p->root->handler; hand!=NULL; hand=next) {
517 	next = hand->next;
518 	ne_free(hand);
519     }
520 
521     /* Clean up remaining elements */
522     for (elm = p->current; elm != p->root; elm = parent) {
523 	parent = elm->parent;
524 	destroy_element(elm);
525     }
526 
527     /* free root element */
528     ne_free(p->root);
529 
530 #ifdef HAVE_EXPAT
531     XML_ParserFree(p->parser);
532     if (p->encoding) ne_free(p->encoding);
533 #else
534     xmlFreeParserCtxt(p->parser);
535 #endif
536 
537     ne_free(p);
538 }
539 
ne_xml_set_error(ne_xml_parser * p,const char * msg)540 void ne_xml_set_error(ne_xml_parser *p, const char *msg)
541 {
542     ne_snprintf(p->error, ERR_SIZE, "%s", msg);
543 }
544 
545 #ifdef HAVE_LIBXML
sax_error(void * ctx,const char * msg,...)546 static void sax_error(void *ctx, const char *msg, ...)
547 {
548     ne_xml_parser *p = ctx;
549     va_list ap;
550     char buf[1024];
551 
552     va_start(ap, msg);
553     ne_vsnprintf(buf, 1024, msg, ap);
554     va_end(ap);
555 
556     ne_snprintf(p->error, ERR_SIZE,
557 		_("XML parse error at line %d: %s."),
558 		p->parser->input->line, buf);
559 
560     p->valid = 0;
561 }
562 #endif
563 
ne_xml_get_error(ne_xml_parser * p)564 const char *ne_xml_get_error(ne_xml_parser *p)
565 {
566     return p->error;
567 }
568 
569 const char *
ne_xml_get_attr(ne_xml_parser * p,const char ** attrs,const char * nspace,const char * name)570 ne_xml_get_attr(ne_xml_parser *p, const char **attrs,
571 		const char *nspace, const char *name)
572 {
573     int n;
574 
575     for (n = 0; attrs[n] != NULL; n += 2) {
576 	char *pnt = strchr(attrs[n], ':');
577 
578 	if (!nspace && !pnt && strcmp(attrs[n], name) == 0) {
579 	    return attrs[n+1];
580 	} else if (nspace && pnt) {
581 	    /* If a namespace is given, and the local part matches,
582 	     * then resolve the namespace and compare that too. */
583 	    if (strcmp(pnt + 1, name) == 0) {
584 		const char *uri = resolve_nspace(p->current,
585 						 attrs[n], pnt - attrs[n]);
586 		if (uri && strcmp(uri, nspace) == 0)
587 		    return attrs[n+1];
588 	    }
589 	}
590     }
591 
592     return NULL;
593 }
594 
ne_xml_mapid(const struct ne_xml_idmap map[],size_t maplen,const char * nspace,const char * name)595 int ne_xml_mapid(const struct ne_xml_idmap map[], size_t maplen,
596                  const char *nspace, const char *name)
597 {
598     size_t n;
599 
600     for (n = 0; n < maplen; n++)
601         if (strcmp(name, map[n].name) == 0 &&
602             strcmp(nspace, map[n].nspace) == 0)
603             return map[n].id;
604 
605     return 0;
606 }
607