1 /*
2 Higher Level Interface to XML Parsers.
3 Copyright (C) 1999-2004, Joe Orton <joe@manyfish.co.uk>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA
19
20 */
21
22 #include "config.h"
23
24 #ifdef HAVE_STDLIB_H
25 #include <stdlib.h>
26 #endif
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30 #ifdef HAVE_STRINGS_H
31 #include <strings.h>
32 #endif
33
34 #include "ne_i18n.h"
35
36 #include "ne_alloc.h"
37 #include "ne_xml.h"
38 #include "ne_utils.h"
39 #include "ne_string.h"
40
41 #if defined(HAVE_EXPAT)
42 /* expat support: */
43 #ifdef HAVE_XMLPARSE_H
44 #include "xmlparse.h"
45 #else
46 #include <expat.h>
47 #endif
48 typedef XML_Char ne_xml_char;
49 #elif defined(HAVE_LIBXML)
50 /* libxml2 support: */
51 #include <libxml/xmlversion.h>
52 #include <libxml/parser.h>
53 typedef xmlChar ne_xml_char;
54
55 #else /* not HAVE_LIBXML */
56 # error need an XML parser
57 #endif /* not HAVE_EXPAT */
58
59 /* Approx. one screen of text: */
60 #define ERR_SIZE (2048)
61
62 struct handler {
63 ne_xml_startelm_cb *startelm_cb; /* start-element callback */
64 ne_xml_endelm_cb *endelm_cb; /* end-element callback */
65 ne_xml_cdata_cb *cdata_cb; /* character-data callback. */
66 void *userdata; /* userdata for the above. */
67 struct handler *next; /* next handler in stack. */
68 };
69
70 #ifdef HAVE_LIBXML
71 static void sax_error(void *ctx, const char *msg, ...);
72 #endif
73
74 struct element {
75 const ne_xml_char *nspace;
76 ne_xml_char *name;
77
78 int state; /* opaque state integer */
79
80 /* Namespaces declared in this element */
81 ne_xml_char *default_ns; /* A default namespace */
82 struct namespace *nspaces; /* List of other namespace scopes */
83
84 struct handler *handler; /* Handler for this element */
85
86 struct element *parent; /* parent element, or NULL */
87 };
88
89 /* We pass around a ne_xml_parser as the userdata in the parsing
90 * library. This maintains the current state of the parse and various
91 * other bits and bobs. Within the parse, we store the current branch
92 * of the tree, i.e., the current element and all its parents, up to
93 * the root, but nothing other than that. */
94 struct ne_xml_parser_s {
95 struct element *root; /* the root of the document */
96 struct element *current; /* current element in the branch */
97 struct handler *top_handlers; /* always points at the
98 * handler on top of the stack. */
99 int valid; /* non-zero whilst parse should continue */
100 int prune; /* if non-zero, depth within a dead branch */
101
102 #ifdef HAVE_EXPAT
103 XML_Parser parser;
104 char *encoding;
105 #else
106 xmlParserCtxtPtr parser;
107 #endif
108 char error[ERR_SIZE];
109 };
110
111 /* The callback handlers */
112 static void start_element(void *userdata, const ne_xml_char *name, const ne_xml_char **atts);
113 static void end_element(void *userdata, const ne_xml_char *name);
114 static void char_data(void *userdata, const ne_xml_char *cdata, int len);
115 static const char *resolve_nspace(const struct element *elm,
116 const char *prefix, size_t pfxlen);
117
118 /* Linked list of namespace scopes */
119 struct namespace {
120 ne_xml_char *name;
121 ne_xml_char *uri;
122 struct namespace *next;
123 };
124
125 #ifdef HAVE_LIBXML
126
127 /* Could be const as far as we care, but libxml doesn't want that */
128 static xmlSAXHandler sax_handler = {
129 NULL, /* internalSubset */
130 NULL, /* isStandalone */
131 NULL, /* hasInternalSubset */
132 NULL, /* hasExternalSubset */
133 NULL, /* resolveEntity */
134 NULL, /* getEntity */
135 NULL, /* entityDecl */
136 NULL, /* notationDecl */
137 NULL, /* attributeDecl */
138 NULL, /* elementDecl */
139 NULL, /* unparsedEntityDecl */
140 NULL, /* setDocumentLocator */
141 NULL, /* startDocument */
142 NULL, /* endDocument */
143 start_element, /* startElement */
144 end_element, /* endElement */
145 NULL, /* reference */
146 char_data, /* characters */
147 NULL, /* ignorableWhitespace */
148 NULL, /* processingInstruction */
149 NULL, /* comment */
150 NULL, /* xmlParserWarning */
151 sax_error, /* xmlParserError */
152 sax_error, /* fatal error (never called by libxml2?) */
153 NULL, /* getParameterEntity */
154 char_data /* cdataBlock */
155 };
156
157 /* empty attributes array to mimic expat behaviour */
158 static const char *empty_atts[] = {NULL, NULL};
159
160 /* macro for determining the attributes array to pass */
161 #define PASS_ATTS(atts) (atts ? (const char **)(atts) : empty_atts)
162
163 #else
164
165 #define PASS_ATTS(atts) ((const char **)(atts))
166
167 /* XML declaration callback for expat. */
decl_handler(void * userdata,const XML_Char * version,const XML_Char * encoding,int standalone)168 static void decl_handler(void *userdata,
169 const XML_Char *version, const XML_Char *encoding,
170 int standalone)
171 {
172 ne_xml_parser *p = userdata;
173 if (encoding) p->encoding = ne_strdup(encoding);
174 }
175
176 #endif /* HAVE_LIBXML */
177
ne_xml_currentline(ne_xml_parser * p)178 int ne_xml_currentline(ne_xml_parser *p)
179 {
180 #ifdef HAVE_EXPAT
181 return XML_GetCurrentLineNumber(p->parser);
182 #else
183 return p->parser->input->line;
184 #endif
185 }
186
ne_xml_doc_encoding(const ne_xml_parser * p)187 const char *ne_xml_doc_encoding(const ne_xml_parser *p)
188 {
189 #ifdef HAVE_LIBXML
190 return p->parser->encoding;
191 #else
192 return p->encoding;
193 #endif
194 }
195
196 /* Extract the namespace prefix declarations from 'atts'. */
declare_nspaces(ne_xml_parser * p,struct element * elm,const ne_xml_char ** atts)197 static int declare_nspaces(ne_xml_parser *p, struct element *elm,
198 const ne_xml_char **atts)
199 {
200 int n;
201
202 for (n = 0; atts && atts[n]; n += 2) {
203 if (strcasecmp(atts[n], "xmlns") == 0) {
204 /* New default namespace */
205 elm->default_ns = ne_strdup(atts[n+1]);
206 } else if (strncasecmp(atts[n], "xmlns:", 6) == 0) {
207 struct namespace *ns;
208
209 if (atts[n][6] == '\0' || atts[n+1][0] == '\0') {
210 ne_snprintf(p->error, ERR_SIZE,
211 ("XML parse error at line %d: invalid namespace "
212 "declaration"), ne_xml_currentline(p));
213 return -1;
214 }
215
216 /* New namespace scope */
217 ns = ne_calloc(sizeof(*ns));
218 ns->next = elm->nspaces;
219 elm->nspaces = ns;
220 ns->name = ne_strdup(atts[n]+6); /* skip the xmlns= */
221 ns->uri = ne_strdup(atts[n+1]);
222 }
223 }
224
225 return 0;
226 }
227
228 /* Expand an XML qualified name, which may include a namespace prefix
229 * as well as the local part. */
expand_qname(ne_xml_parser * p,struct element * elm,const ne_xml_char * qname)230 static int expand_qname(ne_xml_parser *p, struct element *elm,
231 const ne_xml_char *qname)
232 {
233 const ne_xml_char *pfx;
234
235 pfx = strchr(qname, ':');
236 if (pfx == NULL) {
237 struct element *e = elm;
238
239 /* Find default namespace; guaranteed to terminate as the root
240 * element always has default_ns="". */
241 while (e->default_ns == NULL)
242 e = e->parent;
243
244 elm->name = ne_strdup(qname);
245 elm->nspace = e->default_ns;
246 } else {
247 const char *uri = resolve_nspace(elm, qname, pfx-qname);
248
249 if (uri) {
250 /* The name is everything after the ':' */
251 if (pfx[1] == '\0') {
252 ne_snprintf(p->error, ERR_SIZE,
253 ("XML parse error at line %d: element name missing"
254 "after namespace prefix"), ne_xml_currentline(p));
255 return -1;
256 }
257 elm->name = ne_strdup(pfx+1);
258 elm->nspace = uri;
259 } else {
260 ne_snprintf(p->error, ERR_SIZE,
261 ("XML parse error at line %d: undeclared namespace"),
262 ne_xml_currentline(p));
263 return -1;
264 }
265 }
266 return 0;
267 }
268
269 /* Called with the start of a new element. */
start_element(void * userdata,const ne_xml_char * name,const ne_xml_char ** atts)270 static void start_element(void *userdata, const ne_xml_char *name,
271 const ne_xml_char **atts)
272 {
273 ne_xml_parser *p = userdata;
274 struct element *elm;
275 struct handler *hand;
276 int state = NE_XML_DECLINE;
277
278 if (!p->valid) return;
279
280 if (p->prune) {
281 p->prune++;
282 return;
283 }
284
285 /* Create a new element */
286 elm = ne_calloc(sizeof *elm);
287 elm->parent = p->current;
288 p->current = elm;
289
290 if (declare_nspaces(p, elm, atts) || expand_qname(p, elm, name)) {
291 p->valid = 0;
292 return;
293 }
294
295 /* Find a handler which will accept this element (or abort the parse) */
296 for (hand = elm->parent->handler; hand && state == NE_XML_DECLINE;
297 hand = hand->next) {
298 elm->handler = hand;
299 state = hand->startelm_cb(hand->userdata, elm->parent->state,
300 elm->nspace, elm->name, PASS_ATTS(atts));
301 }
302
303 NE_DEBUG(NE_DBG_XMLPARSE, "XML: start-element (%d, {%s, %s}) => %d\n",
304 elm->parent->state, elm->nspace, elm->name, state);
305
306 if (state > 0)
307 elm->state = state;
308 else if (state == NE_XML_DECLINE)
309 /* prune this branch. */
310 p->prune++;
311 else /* state == NE_XML_ABORT */
312 p->valid = 0;
313 }
314
315 /* Destroys an element structure. */
destroy_element(struct element * elm)316 static void destroy_element(struct element *elm)
317 {
318 struct namespace *this_ns, *next_ns;
319 ne_free(elm->name);
320 /* Free the namespaces */
321 this_ns = elm->nspaces;
322 while (this_ns != NULL) {
323 next_ns = this_ns->next;
324 ne_free(this_ns->name);
325 ne_free(this_ns->uri);
326 ne_free(this_ns);
327 this_ns = next_ns;
328 };
329 if (elm->default_ns)
330 ne_free(elm->default_ns);
331 ne_free(elm);
332 }
333
334 /* cdata SAX callback */
char_data(void * userdata,const ne_xml_char * data,int len)335 static void char_data(void *userdata, const ne_xml_char *data, int len)
336 {
337 ne_xml_parser *p = userdata;
338 struct element *elm = p->current;
339
340 if (!p->valid || p->prune) return;
341
342 if (elm->handler->cdata_cb &&
343 elm->handler->cdata_cb(elm->handler->userdata, elm->state, data, len)) {
344 NE_DEBUG(NE_DBG_XML, "Cdata callback failed.\n");
345 p->valid = 0;
346 }
347 }
348
349 /* Called with the end of an element */
end_element(void * userdata,const ne_xml_char * name)350 static void end_element(void *userdata, const ne_xml_char *name)
351 {
352 ne_xml_parser *p = userdata;
353 struct element *elm = p->current;
354
355 if (!p->valid) return;
356
357 if (p->prune) {
358 if (p->prune-- > 1) return;
359 } else if (elm->handler->endelm_cb &&
360 elm->handler->endelm_cb(elm->handler->userdata, elm->state,
361 elm->nspace, elm->name)) {
362 NE_DEBUG(NE_DBG_XML, "XML: end-element for %d failed.\n", elm->state);
363 p->valid = 0;
364 }
365
366 NE_DEBUG(NE_DBG_XMLPARSE, "XML: end-element (%d, {%s, %s})\n",
367 elm->state, elm->nspace, elm->name);
368
369 /* move back up the tree */
370 p->current = elm->parent;
371 p->prune = 0;
372
373 destroy_element(elm);
374 }
375
376 /* Find a namespace definition for 'prefix' in given element, where
377 * length of prefix is 'pfxlen'. Returns the URI or NULL. */
resolve_nspace(const struct element * elm,const char * prefix,size_t pfxlen)378 static const char *resolve_nspace(const struct element *elm,
379 const char *prefix, size_t pfxlen)
380 {
381 const struct element *s;
382
383 /* Search up the tree. */
384 for (s = elm; s != NULL; s = s->parent) {
385 const struct namespace *ns;
386 /* Iterate over defined spaces on this node. */
387 for (ns = s->nspaces; ns != NULL; ns = ns->next) {
388 if (strlen(ns->name) == pfxlen &&
389 memcmp(ns->name, prefix, pfxlen) == 0)
390 return ns->uri;
391 }
392 }
393
394 return NULL;
395 }
396
ne_xml_create(void)397 ne_xml_parser *ne_xml_create(void)
398 {
399 ne_xml_parser *p = ne_calloc(sizeof *p);
400 /* Initialize other stuff */
401 p->valid = 1;
402 /* Placeholder for the root element */
403 p->current = p->root = ne_calloc(sizeof *p->root);
404 p->root->default_ns = "";
405 p->root->state = 0;
406 strcpy(p->error, _("Unknown error"));
407 #ifdef HAVE_EXPAT
408 p->parser = XML_ParserCreate(NULL);
409 if (p->parser == NULL) {
410 abort();
411 }
412 XML_SetElementHandler(p->parser, start_element, end_element);
413 XML_SetCharacterDataHandler(p->parser, char_data);
414 XML_SetUserData(p->parser, (void *) p);
415 XML_SetXmlDeclHandler(p->parser, decl_handler);
416 #else
417 p->parser = xmlCreatePushParserCtxt(&sax_handler,
418 (void *)p, NULL, 0, NULL);
419 if (p->parser == NULL) {
420 abort();
421 }
422 p->parser->replaceEntities = 1;
423 #endif
424 return p;
425 }
426
ne_xml_push_handler(ne_xml_parser * p,ne_xml_startelm_cb * startelm_cb,ne_xml_cdata_cb * cdata_cb,ne_xml_endelm_cb * endelm_cb,void * userdata)427 void ne_xml_push_handler(ne_xml_parser *p,
428 ne_xml_startelm_cb *startelm_cb,
429 ne_xml_cdata_cb *cdata_cb,
430 ne_xml_endelm_cb *endelm_cb,
431 void *userdata)
432 {
433 struct handler *hand = ne_calloc(sizeof(struct handler));
434
435 hand->startelm_cb = startelm_cb;
436 hand->cdata_cb = cdata_cb;
437 hand->endelm_cb = endelm_cb;
438 hand->userdata = userdata;
439
440 /* If this is the first handler registered, update the
441 * base pointer too. */
442 if (p->top_handlers == NULL) {
443 p->root->handler = hand;
444 p->top_handlers = hand;
445 } else {
446 p->top_handlers->next = hand;
447 p->top_handlers = hand;
448 }
449 }
450
ne_xml_parse_v(void * userdata,const char * block,size_t len)451 void ne_xml_parse_v(void *userdata, const char *block, size_t len)
452 {
453 ne_xml_parser *p = userdata;
454 /* FIXME: The two XML parsers break all our nice abstraction by
455 * choosing different char *'s. The swine. This cast will come
456 * back and bite us someday, no doubt. */
457 ne_xml_parse(p, block, len);
458 }
459
460 /* Parse the given block of input of length len */
ne_xml_parse(ne_xml_parser * p,const char * block,size_t len)461 void ne_xml_parse(ne_xml_parser *p, const char *block, size_t len)
462 {
463 int ret, flag;
464 /* duck out if it's broken */
465 if (!p->valid) {
466 NE_DEBUG(NE_DBG_XML, "Not parsing %" NE_FMT_SIZE_T " bytes.\n",
467 len);
468 return;
469 }
470 if (len == 0) {
471 flag = -1;
472 block = "";
473 NE_DEBUG(NE_DBG_XML, "Got 0-length buffer, end of document.\n");
474 } else {
475 NE_DEBUG(NE_DBG_XML, "Parsing %" NE_FMT_SIZE_T " length buffer.\n",
476 len);
477 flag = 0;
478 }
479 /* Note, don't write a parser error if !p->valid, since an error
480 * will already have been written in that case. */
481 #ifdef HAVE_EXPAT
482 ret = XML_Parse(p->parser, block, len, flag);
483 NE_DEBUG(NE_DBG_XMLPARSE, "XML_Parse returned %d\n", ret);
484 if (ret == 0 && p->valid) {
485 ne_snprintf(p->error, ERR_SIZE,
486 "XML parse error at line %d: %s",
487 XML_GetCurrentLineNumber(p->parser),
488 XML_ErrorString(XML_GetErrorCode(p->parser)));
489 p->valid = 0;
490 }
491 #else
492 ret = xmlParseChunk(p->parser, block, len, flag);
493 NE_DEBUG(NE_DBG_XMLPARSE, "xmlParseChunk returned %d\n", ret);
494 /* Parse errors are normally caught by the sax_error() callback,
495 * which clears p->valid. */
496 if (p->parser->errNo && p->valid) {
497 ne_snprintf(p->error, ERR_SIZE, "XML parse error at line %d.",
498 ne_xml_currentline(p));
499 p->valid = 0;
500 }
501 #endif
502 }
503
ne_xml_valid(ne_xml_parser * p)504 int ne_xml_valid(ne_xml_parser *p)
505 {
506 return p->valid;
507 }
508
ne_xml_destroy(ne_xml_parser * p)509 void ne_xml_destroy(ne_xml_parser *p)
510 {
511 struct element *elm, *parent;
512 struct handler *hand, *next;
513
514 /* Free up the handlers on the stack: the root element has the
515 * pointer to the base of the handler stack. */
516 for (hand = p->root->handler; hand!=NULL; hand=next) {
517 next = hand->next;
518 ne_free(hand);
519 }
520
521 /* Clean up remaining elements */
522 for (elm = p->current; elm != p->root; elm = parent) {
523 parent = elm->parent;
524 destroy_element(elm);
525 }
526
527 /* free root element */
528 ne_free(p->root);
529
530 #ifdef HAVE_EXPAT
531 XML_ParserFree(p->parser);
532 if (p->encoding) ne_free(p->encoding);
533 #else
534 xmlFreeParserCtxt(p->parser);
535 #endif
536
537 ne_free(p);
538 }
539
ne_xml_set_error(ne_xml_parser * p,const char * msg)540 void ne_xml_set_error(ne_xml_parser *p, const char *msg)
541 {
542 ne_snprintf(p->error, ERR_SIZE, "%s", msg);
543 }
544
545 #ifdef HAVE_LIBXML
sax_error(void * ctx,const char * msg,...)546 static void sax_error(void *ctx, const char *msg, ...)
547 {
548 ne_xml_parser *p = ctx;
549 va_list ap;
550 char buf[1024];
551
552 va_start(ap, msg);
553 ne_vsnprintf(buf, 1024, msg, ap);
554 va_end(ap);
555
556 ne_snprintf(p->error, ERR_SIZE,
557 _("XML parse error at line %d: %s."),
558 p->parser->input->line, buf);
559
560 p->valid = 0;
561 }
562 #endif
563
ne_xml_get_error(ne_xml_parser * p)564 const char *ne_xml_get_error(ne_xml_parser *p)
565 {
566 return p->error;
567 }
568
569 const char *
ne_xml_get_attr(ne_xml_parser * p,const char ** attrs,const char * nspace,const char * name)570 ne_xml_get_attr(ne_xml_parser *p, const char **attrs,
571 const char *nspace, const char *name)
572 {
573 int n;
574
575 for (n = 0; attrs[n] != NULL; n += 2) {
576 char *pnt = strchr(attrs[n], ':');
577
578 if (!nspace && !pnt && strcmp(attrs[n], name) == 0) {
579 return attrs[n+1];
580 } else if (nspace && pnt) {
581 /* If a namespace is given, and the local part matches,
582 * then resolve the namespace and compare that too. */
583 if (strcmp(pnt + 1, name) == 0) {
584 const char *uri = resolve_nspace(p->current,
585 attrs[n], pnt - attrs[n]);
586 if (uri && strcmp(uri, nspace) == 0)
587 return attrs[n+1];
588 }
589 }
590 }
591
592 return NULL;
593 }
594
ne_xml_mapid(const struct ne_xml_idmap map[],size_t maplen,const char * nspace,const char * name)595 int ne_xml_mapid(const struct ne_xml_idmap map[], size_t maplen,
596 const char *nspace, const char *name)
597 {
598 size_t n;
599
600 for (n = 0; n < maplen; n++)
601 if (strcmp(name, map[n].name) == 0 &&
602 strcmp(nspace, map[n].nspace) == 0)
603 return map[n].id;
604
605 return 0;
606 }
607