1 /*
2  *  Copyright (C) 2002-2003,2007 the xine project
3  *
4  *  This file is part of xine, a free video player.
5  *
6  * The xine-lib XML parser is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * The xine-lib XML parser is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public
17  * License along with the Gnome Library; see the file COPYING.LIB.  If not,
18  * write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
19  * Floor, Boston, MA 02110, USA
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #ifdef XINE_COMPILE
27 # include "config.h"
28 #endif
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdarg.h>
33 #include <ctype.h>
34 
35 #ifdef _MSC_VER
36 #define snprintf sprintf_s
37 #define strcasecmp stricmp
38 #endif
39 
40 #define LOG_MODULE "xmlparser"
41 #define LOG_VERBOSE
42 /*
43 #define LOG
44 */
45 
46 #include <totem_internal.h>
47 #ifdef XINE_COMPILE
48 #include <xine/xineutils.h>
49 #else
50 #define lprintf(...)
51 #define XINE_MALLOC
52 #endif
53 #include "xmllexer.h"
54 #include "xmlparser.h"
55 
56 
57 #define TOKEN_SIZE  64 * 1024
58 #define DATA_SIZE   64 * 1024
59 #define MAX_RECURSION 26
60 
61 /* private global variables */
62 xml_parser_t * static_xml_parser;
63 
64 /* private functions */
65 
strtoupper(char * str)66 static char * strtoupper(char * str) {
67   int i = 0;
68 
69   while (str[i] != '\0') {
70     str[i] = (char)toupper((int)str[i]);
71     i++;
72   }
73   return str;
74 }
75 
new_xml_node(void)76 static xml_node_t * new_xml_node(void) {
77   xml_node_t * new_node;
78 
79   new_node = (xml_node_t*) malloc(sizeof(xml_node_t));
80   new_node->name  = NULL;
81   new_node->data  = NULL;
82   new_node->props = NULL;
83   new_node->child = NULL;
84   new_node->next  = NULL;
85   return new_node;
86 }
87 
88 static const char cdata[] = CDATA_MARKER;
89 
free_xml_node(xml_node_t * node)90 static void free_xml_node(xml_node_t * node) {
91   if (node->name != cdata)
92     free (node->name);
93   free (node->data);
94   free(node);
95 }
96 
new_xml_property(void)97 static xml_property_t *XINE_MALLOC new_xml_property(void) {
98   xml_property_t * new_property;
99 
100   new_property = (xml_property_t*) malloc(sizeof(xml_property_t));
101   new_property->name  = NULL;
102   new_property->value = NULL;
103   new_property->next  = NULL;
104   return new_property;
105 }
106 
free_xml_property(xml_property_t * property)107 static void free_xml_property(xml_property_t * property) {
108   free (property->name);
109   free (property->value);
110   free(property);
111 }
112 
113 /* for ABI compatibility */
xml_parser_init(const char * buf,int size,int mode)114 void xml_parser_init(const char * buf, int size, int mode) {
115   if (static_xml_parser) {
116     xml_parser_finalize_r(static_xml_parser);
117   }
118   static_xml_parser = xml_parser_init_r(buf, size, mode);
119 }
120 
xml_parser_init_r(const char * buf,int size,int mode)121 xml_parser_t *xml_parser_init_r(const char * buf, int size, int mode) {
122   xml_parser_t *xml_parser = malloc(sizeof(*xml_parser));
123   xml_parser->lexer = lexer_init_r(buf, size);
124   xml_parser->mode = mode;
125   return xml_parser;
126 }
127 
xml_parser_finalize_r(xml_parser_t * xml_parser)128 void xml_parser_finalize_r(xml_parser_t *xml_parser) {
129   lexer_finalize_r(xml_parser->lexer);
130   free(xml_parser);
131 }
132 
xml_parser_free_props(xml_property_t * current_property)133 static void xml_parser_free_props(xml_property_t *current_property) {
134   if (current_property) {
135     if (!current_property->next) {
136       free_xml_property(current_property);
137     } else {
138       xml_parser_free_props(current_property->next);
139       free_xml_property(current_property);
140     }
141   }
142 }
143 
xml_parser_free_tree_rec(xml_node_t * current_node,int free_next)144 static void xml_parser_free_tree_rec(xml_node_t *current_node, int free_next) {
145   lprintf("xml_parser_free_tree_rec: %s\n", current_node->name);
146 
147   if (current_node) {
148     /* properties */
149     if (current_node->props) {
150       xml_parser_free_props(current_node->props);
151     }
152 
153     /* child nodes */
154     if (current_node->child) {
155       lprintf("xml_parser_free_tree_rec: child\n");
156       xml_parser_free_tree_rec(current_node->child, 1);
157     }
158 
159     /* next nodes */
160     if (free_next) {
161       xml_node_t *next_node = current_node->next;
162       xml_node_t *next_next_node;
163 
164       while (next_node) {
165         next_next_node = next_node->next;
166         lprintf("xml_parser_free_tree_rec: next\n");
167         xml_parser_free_tree_rec(next_node, 0);
168         next_node = next_next_node;
169       }
170     }
171 
172     free_xml_node(current_node);
173   }
174 }
175 
xml_parser_free_tree(xml_node_t * current_node)176 void xml_parser_free_tree(xml_node_t *current_node) {
177   lprintf("xml_parser_free_tree\n");
178    xml_parser_free_tree_rec(current_node, 1);
179 }
180 
181 typedef enum {
182   /*0*/
183   STATE_IDLE,
184   /* <foo ...> */
185   STATE_NODE,
186   STATE_ATTRIBUTE,
187   STATE_NODE_CLOSE,
188   STATE_TAG_TERM,
189   STATE_ATTRIBUTE_EQUALS,
190   STATE_STRING,
191   STATE_TAG_TERM_IGNORE,
192   /* <?foo ...?> */
193   STATE_Q_NODE,
194   STATE_Q_ATTRIBUTE,
195   STATE_Q_NODE_CLOSE,
196   STATE_Q_TAG_TERM,
197   STATE_Q_ATTRIBUTE_EQUALS,
198   STATE_Q_STRING,
199   /* Others */
200   STATE_COMMENT,
201   STATE_DOCTYPE,
202   STATE_CDATA,
203 } parser_state_t;
204 
xml_parser_append_text(xml_node_t * node,xml_node_t * subnode,const char * text,int flags)205 static xml_node_t *xml_parser_append_text (xml_node_t *node, xml_node_t *subnode, const char *text, int flags)
206 {
207   if (!text || !*text)
208     return subnode; /* empty string -> nothing to do */
209 
210   if ((flags & XML_PARSER_MULTI_TEXT) && subnode) {
211     /* we have a subtree, so we can't use node->data */
212     if (subnode->name == cdata) {
213       /* most recent node is CDATA - append to it */
214       char *newtext;
215       asprintf (&newtext, "%s%s", subnode->data, text);
216       free (subnode->data);
217       subnode->data = newtext;
218     } else {
219       /* most recent node is not CDATA - add a sibling */
220       subnode->next = new_xml_node ();
221       subnode->next->name = (char*) cdata; /* we never free cdata */
222       subnode->next->data = strdup (text);
223       subnode = subnode->next;
224     }
225   } else if (node->data) {
226     /* "no" subtree, but we have existing text - append to it */
227     char *newtext;
228     asprintf (&newtext, "%s%s", node->data, text);
229     free (node->data);
230     node->data = newtext;
231   } else {
232     /* no text, "no" subtree - duplicate & assign */
233     while (isspace (*text))
234       ++text;
235     if (*text)
236       node->data = strdup (text);
237   }
238 
239   return subnode;
240 }
241 
242 #define Q_STATE(CURRENT,NEW) (STATE_##NEW + state - STATE_##CURRENT)
243 
244 
xml_parser_get_node_internal(xml_parser_t * xml_parser,char ** token_buffer,int * token_buffer_size,char ** pname_buffer,int * pname_buffer_size,char ** nname_buffer,int * nname_buffer_size,xml_node_t * current_node,char * root_names[],int rec,int flags)245 static int xml_parser_get_node_internal (xml_parser_t *xml_parser,
246 				 char ** token_buffer, int * token_buffer_size,
247                                  char ** pname_buffer, int * pname_buffer_size,
248                                  char ** nname_buffer, int * nname_buffer_size,
249                                  xml_node_t *current_node, char *root_names[], int rec, int flags)
250 {
251   char *tok = *token_buffer;
252   char *property_name = *pname_buffer;
253   char *node_name = *nname_buffer;
254   parser_state_t state = STATE_IDLE;
255   int res = 0;
256   int parse_res;
257   int bypass_get_token = 0;
258   int retval = 0; /* used when state==4; non-0 if there are missing </...> */
259   xml_node_t *subtree = NULL;
260   xml_node_t *current_subtree = NULL;
261   xml_property_t *current_property = NULL;
262   xml_property_t *properties = NULL;
263 
264   if (rec < MAX_RECURSION) {
265 
266     memset (tok, 0, *token_buffer_size);
267 
268     while ((bypass_get_token) || (res = lexer_get_token_d_r(xml_parser->lexer, token_buffer, token_buffer_size, 0)) != T_ERROR) {
269       tok = *token_buffer;
270       bypass_get_token = 0;
271       lprintf("info: %d - %d : '%s'\n", state, res, tok);
272 
273       switch (state) {
274       case STATE_IDLE:
275 	switch (res) {
276 	case (T_EOL):
277 	case (T_SEPAR):
278 	  /* do nothing */
279 	  break;
280 	case (T_EOF):
281 	  return retval; /* normal end */
282 	  break;
283 	case (T_M_START_1):
284 	  state = STATE_NODE;
285 	  break;
286 	case (T_M_START_2):
287 	  state = STATE_NODE_CLOSE;
288 	  break;
289 	case (T_C_START):
290 	  state = STATE_COMMENT;
291 	  break;
292 	case (T_TI_START):
293 	  state = STATE_Q_NODE;
294 	  break;
295 	case (T_DOCTYPE_START):
296 	  state = STATE_DOCTYPE;
297 	  break;
298 	case (T_CDATA_START):
299 	  state = STATE_CDATA;
300 	  break;
301 	case (T_DATA):
302 	  /* current data */
303 	  {
304 	    char *decoded = lexer_decode_entities (tok);
305 	    current_subtree = xml_parser_append_text (current_node, current_subtree, decoded, flags);
306 	    free (decoded);
307 	  }
308 	  lprintf("info: node data : %s\n", current_node->data);
309 	  break;
310 	default:
311 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
312 	  return -1;
313 	  break;
314 	}
315 	break;
316 
317       case STATE_NODE:
318       case STATE_Q_NODE:
319 	switch (res) {
320 	case (T_IDENT):
321 	  properties = NULL;
322 	  current_property = NULL;
323 
324 	  /* save node name */
325 	  if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
326 	    strtoupper(tok);
327 	  }
328 	  if (state == STATE_Q_NODE) {
329 	    asprintf (&node_name, "?%s", tok);
330 	    free (*nname_buffer);
331 	    *nname_buffer = node_name;
332 	    *nname_buffer_size = strlen (node_name) + 1;
333 	    state = STATE_Q_ATTRIBUTE;
334 	  } else {
335 	    free (*nname_buffer);
336 	    *nname_buffer = node_name = strdup (tok);
337 	    *nname_buffer_size = strlen (node_name) + 1;
338 	    state = STATE_ATTRIBUTE;
339 	  }
340 	  lprintf("info: current node name \"%s\"\n", node_name);
341 	  break;
342 	default:
343 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
344 	  return -1;
345 	  break;
346 	}
347 	break;
348 
349       case STATE_ATTRIBUTE:
350 	switch (res) {
351 	case (T_EOL):
352 	case (T_SEPAR):
353 	  /* nothing */
354 	  break;
355 	case (T_M_STOP_1):
356 	  /* new subtree */
357 	  subtree = new_xml_node();
358 
359 	  /* set node name */
360 	  subtree->name = strdup(node_name);
361 
362 	  /* set node propertys */
363 	  subtree->props = properties;
364 	  lprintf("info: rec %d new subtree %s\n", rec, node_name);
365 	  root_names[rec + 1] = strdup (node_name);
366 	  parse_res = xml_parser_get_node_internal (xml_parser, token_buffer, token_buffer_size,
367 						    pname_buffer, pname_buffer_size,
368 						    nname_buffer, nname_buffer_size,
369 						    subtree, root_names, rec + 1, flags);
370 	  tok = *token_buffer;
371 	  free (root_names[rec + 1]);
372 	  if (parse_res == -1 || parse_res > 0) {
373 	    return parse_res;
374 	  }
375 	  if (current_subtree == NULL) {
376 	    current_node->child = subtree;
377 	    current_subtree = subtree;
378 	  } else {
379 	    current_subtree->next = subtree;
380 	    current_subtree = subtree;
381 	  }
382 	  if (parse_res < -1) {
383 	    /* badly-formed XML (missing close tag) */
384 	    return parse_res + 1 + (parse_res == -2);
385 	  }
386 	  state = STATE_IDLE;
387 	  break;
388 	case (T_M_STOP_2):
389 	  /* new leaf */
390 	  /* new subtree */
391 	  new_leaf:
392 	  subtree = new_xml_node();
393 
394 	  /* set node name */
395 	  subtree->name = strdup (node_name);
396 
397 	  /* set node propertys */
398 	  subtree->props = properties;
399 
400 	  lprintf("info: rec %d new subtree %s\n", rec, node_name);
401 
402 	  if (current_subtree == NULL) {
403 	    current_node->child = subtree;
404 	    current_subtree = subtree;
405 	  } else {
406 	    current_subtree->next = subtree;
407 	    current_subtree = subtree;
408 	  }
409 	  state = STATE_IDLE;
410 	  break;
411 	case (T_IDENT):
412 	  /* save property name */
413 	  new_prop:
414 	  if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
415 	    strtoupper(tok);
416 	  }
417 	  /* make sure the buffer for the property name is big enough */
418 	  if (*token_buffer_size > *pname_buffer_size) {
419 	    char *tmp_prop;
420 	    *pname_buffer_size = *token_buffer_size;
421 	    tmp_prop = realloc (*pname_buffer, *pname_buffer_size);
422 	    if (!tmp_prop)
423 	      return -1;
424 	    *pname_buffer = tmp_prop;
425 	    property_name = tmp_prop;
426 	  } else {
427 	    property_name = *pname_buffer;
428 	  }
429 	  strcpy(property_name, tok);
430 	  state = Q_STATE(ATTRIBUTE, ATTRIBUTE_EQUALS);
431 	  lprintf("info: current property name \"%s\"\n", property_name);
432 	  break;
433 	default:
434 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
435 	  return -1;
436 	  break;
437 	}
438 	break;
439 
440       case STATE_Q_ATTRIBUTE:
441 	switch (res) {
442 	case (T_EOL):
443 	case (T_SEPAR):
444 	  /* nothing */
445 	  break;
446 	case (T_TI_STOP):
447 	  goto new_leaf;
448 	case (T_IDENT):
449 	  goto new_prop;
450 	default:
451 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
452 	  return -1;
453 	  break;
454 	}
455 	break;
456 
457       case STATE_NODE_CLOSE:
458 	switch (res) {
459 	case (T_IDENT):
460 	  /* must be equal to root_name */
461 	  if (xml_parser->mode == XML_PARSER_CASE_INSENSITIVE) {
462 	    strtoupper(tok);
463 	  }
464 	  if (strcmp(tok, root_names[rec]) == 0) {
465 	    state = STATE_TAG_TERM;
466 	  } else if (flags & XML_PARSER_RELAXED) {
467 	    int r = rec;
468 	    while (--r >= 0)
469 	      if (strcmp(tok, root_names[r]) == 0) {
470 		lprintf("warning: wanted %s, got %s - assuming missing close tags\n", root_names[rec], tok);
471 		retval = r - rec - 1; /* -1 - (no. of implied close tags) */
472 		state = STATE_TAG_TERM;
473 		break;
474 	      }
475 	    /* relaxed parsing, ignoring extra close tag (but we don't handle out-of-order) */
476 	    if (r < 0) {
477 	      lprintf("warning: extra close tag %s - ignoring\n", tok);
478 	      state = STATE_TAG_TERM_IGNORE;
479 	    }
480 	  }
481 	  else
482 	  {
483 	    lprintf("error: xml struct, tok=%s, waited_tok=%s\n", tok, root_names[rec]);
484 	    return -1;
485 	  }
486 	  break;
487 	default:
488 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
489 	  return -1;
490 	  break;
491 	}
492 	break;
493 
494 				/* > expected */
495       case STATE_TAG_TERM:
496 	switch (res) {
497 	case (T_M_STOP_1):
498 	  return retval;
499 	  break;
500 	default:
501 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
502 	  return -1;
503 	  break;
504 	}
505 	break;
506 
507 				/* = or > or ident or separator expected */
508       case STATE_ATTRIBUTE_EQUALS:
509 	switch (res) {
510 	case (T_EOL):
511 	case (T_SEPAR):
512 	  /* do nothing */
513 	  break;
514 	case (T_EQUAL):
515 	  state = STATE_STRING;
516 	  break;
517 	case (T_IDENT):
518 	  bypass_get_token = 1; /* jump to state 2 without get a new token */
519 	  state = STATE_ATTRIBUTE;
520 	  break;
521 	case (T_M_STOP_1):
522 	  /* add a new property without value */
523 	  if (current_property == NULL) {
524 	    properties = new_xml_property();
525 	    current_property = properties;
526 	  } else {
527 	    current_property->next = new_xml_property();
528 	    current_property = current_property->next;
529 	  }
530 	  current_property->name = strdup (property_name);
531 	  lprintf("info: new property %s\n", current_property->name);
532 	  bypass_get_token = 1; /* jump to state 2 without get a new token */
533 	  state = STATE_ATTRIBUTE;
534 	  break;
535 	default:
536 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
537 	  return -1;
538 	  break;
539 	}
540 	break;
541 
542 				/* = or ?> or ident or separator expected */
543       case STATE_Q_ATTRIBUTE_EQUALS:
544 	switch (res) {
545 	case (T_EOL):
546 	case (T_SEPAR):
547 	  /* do nothing */
548 	  break;
549 	case (T_EQUAL):
550 	  state = STATE_Q_STRING;
551 	  break;
552 	case (T_IDENT):
553 	  bypass_get_token = 1; /* jump to state 2 without get a new token */
554 	  state = STATE_Q_ATTRIBUTE;
555 	  break;
556 	case (T_TI_STOP):
557 	  /* add a new property without value */
558 	  if (current_property == NULL) {
559 	    properties = new_xml_property();
560 	    current_property = properties;
561 	  } else {
562 	    current_property->next = new_xml_property();
563 	    current_property = current_property->next;
564 	  }
565 	  current_property->name = strdup (property_name);
566 	  lprintf("info: new property %s\n", current_property->name);
567 	  bypass_get_token = 1; /* jump to state 2 without get a new token */
568 	  state = STATE_Q_ATTRIBUTE;
569 	  break;
570 	default:
571 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
572 	  return -1;
573 	  break;
574 	}
575 	break;
576 
577 				/* string or ident or separator expected */
578       case STATE_STRING:
579       case STATE_Q_STRING:
580 	switch (res) {
581 	case (T_EOL):
582 	case (T_SEPAR):
583 	  /* do nothing */
584 	  break;
585 	case (T_STRING):
586 	case (T_IDENT):
587 	  /* add a new property */
588 	  if (current_property == NULL) {
589 	    properties = new_xml_property();
590 	    current_property = properties;
591 	  } else {
592 	    current_property->next = new_xml_property();
593 	    current_property = current_property->next;
594 	  }
595 	  current_property->name = strdup(property_name);
596 	  current_property->value = lexer_decode_entities(tok);
597 	  lprintf("info: new property %s=%s\n", current_property->name, current_property->value);
598 	  state = Q_STATE(STRING, ATTRIBUTE);
599 	  break;
600 	default:
601 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
602 	  return -1;
603 	  break;
604 	}
605 	break;
606 
607 				/* --> expected */
608       case STATE_COMMENT:
609 	switch (res) {
610 	case (T_C_STOP):
611 	  state = STATE_IDLE;
612 	  break;
613 	default:
614 	  break;
615 	}
616 	break;
617 
618 				/* > expected */
619       case STATE_DOCTYPE:
620 	switch (res) {
621 	case (T_M_STOP_1):
622 	  state = 0;
623 	  break;
624 	default:
625 	  break;
626 	}
627 	break;
628 
629 				/* ]]> expected */
630       case STATE_CDATA:
631 	switch (res) {
632 	case (T_CDATA_STOP):
633 	  current_subtree = xml_parser_append_text (current_node, current_subtree, tok, flags);
634 	  lprintf("info: node cdata : %s\n", tok);
635 	  state = STATE_IDLE;
636 	  break;
637         default:
638 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
639 	  return -1;
640 	  break;
641 	}
642 	break;
643 
644 				/* > expected (following unmatched "</...") */
645       case STATE_TAG_TERM_IGNORE:
646 	switch (res) {
647 	case (T_M_STOP_1):
648 	  state = STATE_IDLE;
649 	  break;
650 	default:
651 	  lprintf("error: unexpected token \"%s\", state %d\n", tok, state);
652 	  return -1;
653 	  break;
654 	}
655 	break;
656 
657 
658       case STATE_Q_NODE_CLOSE:
659       case STATE_Q_TAG_TERM:
660       default:
661 	lprintf("error: unknown parser state, state=%d\n", state);
662 	return -1;
663       }
664     }
665     /* lex error */
666     lprintf("error: lexer error\n");
667     return -1;
668   } else {
669     /* max recursion */
670     lprintf("error: max recursion\n");
671     return -1;
672   }
673 }
674 
xml_parser_get_node(xml_parser_t * xml_parser,xml_node_t * current_node,int flags)675 static int xml_parser_get_node (xml_parser_t *xml_parser, xml_node_t *current_node, int flags)
676 {
677   int res = 0;
678   int token_buffer_size = TOKEN_SIZE;
679   int pname_buffer_size = TOKEN_SIZE;
680   int nname_buffer_size = TOKEN_SIZE;
681   char *token_buffer = calloc(1, token_buffer_size);
682   char *pname_buffer = calloc(1, pname_buffer_size);
683   char *nname_buffer = calloc(1, nname_buffer_size);
684   char *root_names[MAX_RECURSION + 1];
685   root_names[0] = (char*) ""; /* xml_parser_get_node_internal() only frees names which it allocates */
686 
687   res = xml_parser_get_node_internal (xml_parser,
688 			     &token_buffer, &token_buffer_size,
689                              &pname_buffer, &pname_buffer_size,
690                              &nname_buffer, &nname_buffer_size,
691                              current_node, root_names, 0, flags);
692 
693   free (token_buffer);
694   free (pname_buffer);
695   free (nname_buffer);
696 
697   return res;
698 }
699 
700 /* for ABI compatibility */
xml_parser_build_tree_with_options(xml_node_t ** root_node,int flags)701 int xml_parser_build_tree_with_options(xml_node_t **root_node, int flags) {
702   return xml_parser_build_tree_with_options_r(static_xml_parser, root_node, flags);
703 }
704 
xml_parser_build_tree_with_options_r(xml_parser_t * xml_parser,xml_node_t ** root_node,int flags)705 int xml_parser_build_tree_with_options_r(xml_parser_t *xml_parser, xml_node_t **root_node, int flags) {
706   xml_node_t *tmp_node, *pri_node, *q_node;
707   int res;
708 
709   tmp_node = new_xml_node();
710   res = xml_parser_get_node(xml_parser, tmp_node, flags);
711 
712   /* delete any top-level [CDATA] nodes */;
713   pri_node = tmp_node->child;
714   q_node = NULL;
715   while (pri_node) {
716     if (pri_node->name == cdata) {
717       xml_node_t *old = pri_node;
718       if (q_node)
719         q_node->next = pri_node->next;
720       else
721         q_node = pri_node;
722       pri_node = pri_node->next;
723       free_xml_node (old);
724     } else {
725       q_node = pri_node;
726       pri_node = pri_node->next;
727     }
728   }
729 
730   /* find first non-<?...?> node */;
731   for (pri_node = tmp_node->child, q_node = NULL;
732        pri_node && pri_node->name[0] == '?';
733        pri_node = pri_node->next)
734     q_node = pri_node; /* last <?...?> node (eventually), or NULL */
735 
736   if (pri_node && !pri_node->next) {
737     /* move the tail to the head (for compatibility reasons) */
738     if (q_node) {
739       pri_node->next = tmp_node->child;
740       q_node->next = NULL;
741     }
742     *root_node = pri_node;
743     free_xml_node(tmp_node);
744     res = 0;
745   } else {
746     lprintf("error: xml struct\n");
747     xml_parser_free_tree(tmp_node);
748     res = -1;
749   }
750   return res;
751 }
752 
753 /* for ABI compatibility */
xml_parser_build_tree(xml_node_t ** root_node)754 int xml_parser_build_tree(xml_node_t **root_node) {
755   return xml_parser_build_tree_with_options_r (static_xml_parser, root_node, 0);
756 }
757 
xml_parser_build_tree_r(xml_parser_t * xml_parser,xml_node_t ** root_node)758 int xml_parser_build_tree_r(xml_parser_t *xml_parser, xml_node_t **root_node) {
759   return xml_parser_build_tree_with_options_r(xml_parser, root_node, 0);
760 }
761 
xml_parser_get_property(const xml_node_t * node,const char * name)762 const char *xml_parser_get_property (const xml_node_t *node, const char *name) {
763 
764   xml_property_t *prop;
765 
766   prop = node->props;
767   while (prop) {
768 
769     lprintf ("looking for %s in %s\n", name, prop->name);
770 
771     if (!strcasecmp (prop->name, name)) {
772       lprintf ("found it. value=%s\n", prop->value);
773       return prop->value;
774     }
775 
776     prop = prop->next;
777   }
778 
779   return NULL;
780 }
781 
xml_parser_get_property_int(const xml_node_t * node,const char * name,int def_value)782 int xml_parser_get_property_int (const xml_node_t *node, const char *name,
783 				 int def_value) {
784 
785   const char *v;
786   int         ret;
787 
788   v = xml_parser_get_property (node, name);
789 
790   if (!v)
791     return def_value;
792 
793   if (sscanf (v, "%d", &ret) != 1)
794     return def_value;
795   else
796     return ret;
797 }
798 
xml_parser_get_property_bool(const xml_node_t * node,const char * name,int def_value)799 int xml_parser_get_property_bool (const xml_node_t *node, const char *name,
800 				  int def_value) {
801 
802   const char *v;
803 
804   v = xml_parser_get_property (node, name);
805 
806   if (!v)
807     return def_value;
808 
809   return !strcasecmp (v, "true");
810 }
811 
xml_escape_string_internal(char * buf,const char * s,xml_escape_quote_t quote_type)812 static int xml_escape_string_internal (char *buf, const char *s,
813 				       xml_escape_quote_t quote_type)
814 {
815   int c, length = 0;
816   int sl = buf ? 8 : 0;
817   /* calculate max required buffer size */
818   while ((c = *s++ & 0xFF))
819     switch (c)
820     {
821     case '"':  if (quote_type != XML_ESCAPE_DOUBLE_QUOTE) goto literal;
822 	       length += snprintf (buf + length, sl, "&quot;"); break;
823     case '\'': if (quote_type != XML_ESCAPE_SINGLE_QUOTE) goto literal;
824 	       length += snprintf (buf + length, sl, "&apos;"); break;
825     case '&':  length += snprintf (buf + length, sl, "&amp;");  break;
826     case '<':  length += snprintf (buf + length, sl, "&lt;");   break;
827     case '>':  length += snprintf (buf + length, sl, "&gt;");   break;
828     case 127:  length += snprintf (buf + length, sl, "&#127;"); break;
829     case '\t':
830     case '\n':
831       literal: if (buf)	buf[length] = c; ++length; break;
832     default:   if (c >= ' ') goto literal;
833 	       length += snprintf (buf + length, sl, "&#%d;", c); break;
834     }
835   if (buf)
836     buf[length] = 0;
837   return length + 1;
838 }
839 
xml_escape_string(const char * s,xml_escape_quote_t quote_type)840 char *xml_escape_string (const char *s, xml_escape_quote_t quote_type)
841 {
842   char *buf = calloc (1, xml_escape_string_internal (NULL, s, quote_type));
843   return buf ? (xml_escape_string_internal (buf, s, quote_type), buf) : NULL;
844 }
845 
xml_parser_dump_node(const xml_node_t * node,int indent)846 static void xml_parser_dump_node (const xml_node_t *node, int indent) {
847   size_t l;
848 
849   xml_property_t *p;
850   xml_node_t     *n;
851 
852   printf ("%*s<%s ", indent, "", node->name);
853 
854   l = strlen (node->name);
855 
856   p = node->props;
857   while (p) {
858     char *value = xml_escape_string (p->value, XML_ESCAPE_SINGLE_QUOTE);
859     printf ("%s='%s'", p->name, value);
860     free (value);
861     p = p->next;
862     if (p) {
863       printf ("\n%*s", indent + 2 + (int) l, "");
864     }
865   }
866   printf (">\n");
867 
868   n = node->child;
869   while (n) {
870 
871     xml_parser_dump_node (n, indent+2);
872 
873     n = n->next;
874   }
875 
876   printf ("%*s</%s>\n", indent, "", node->name);
877 }
878 
xml_parser_dump_tree(const xml_node_t * node)879 void xml_parser_dump_tree (const xml_node_t *node) {
880   do {
881     xml_parser_dump_node (node, 0);
882     node = node->next;
883   } while (node);
884 }
885 
886 #ifdef XINE_XML_PARSER_TEST
887 #include <sys/mman.h>
888 #include <sys/types.h>
889 #include <sys/stat.h>
890 #include <fcntl.h>
891 
xine_xmalloc(size_t size)892 void *xine_xmalloc (size_t size)
893 {
894   return malloc (size);
895 }
896 
main(int argc,char ** argv)897 int main (int argc, char **argv)
898 {
899   int i, ret = 0;
900   for (i = 1; argv[i]; ++i)
901   {
902     xml_node_t *tree;
903     int fd;
904     void *buf;
905     struct stat st;
906 
907     if (stat (argv[i], &st))
908     {
909       perror (argv[i]);
910       ret = 1;
911       continue;
912     }
913     if (!S_ISREG (st.st_mode))
914     {
915       printf ("%s: not a file\n", argv[i]);
916       ret = 1;
917       continue;
918     }
919     fd = open (argv[i], O_RDONLY);
920     if (!fd)
921     {
922       perror (argv[i]);
923       ret = 1;
924       continue;
925     }
926     buf = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
927     if (buf == MAP_FAILED)
928     {
929       perror (argv[i]);
930       if (close (fd))
931         perror (argv[i]);
932       ret = 1;
933       continue;
934     }
935 
936     xml_parser_init (buf, st.st_size, 0);
937     if (!xml_parser_build_tree (&tree))
938     {
939       puts (argv[i]);
940       xml_parser_dump_tree (tree);
941       xml_parser_free_tree (tree);
942     }
943     else
944       printf ("%s: parser failure\n", argv[i]);
945 
946     if (close (fd))
947     {
948       perror (argv[i]);
949       ret = 1;
950     }
951   }
952   return ret;
953 }
954 #endif
955