1 /* $Id$ */
2 /*
3  * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4  * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 #include <pjlib-util/xml.h>
21 #include <pjlib-util/scanner.h>
22 #include <pj/except.h>
23 #include <pj/pool.h>
24 #include <pj/string.h>
25 #include <pj/log.h>
26 #include <pj/os.h>
27 
28 #define EX_SYNTAX_ERROR	12
29 #define THIS_FILE	"xml.c"
30 
on_syntax_error(struct pj_scanner * scanner)31 static void on_syntax_error(struct pj_scanner *scanner)
32 {
33     PJ_UNUSED_ARG(scanner);
34     PJ_THROW(EX_SYNTAX_ERROR);
35 }
36 
alloc_node(pj_pool_t * pool)37 static pj_xml_node *alloc_node( pj_pool_t *pool )
38 {
39     pj_xml_node *node;
40 
41     node = PJ_POOL_ZALLOC_T(pool, pj_xml_node);
42     pj_list_init( &node->attr_head );
43     pj_list_init( &node->node_head );
44 
45     return node;
46 }
47 
alloc_attr(pj_pool_t * pool)48 static pj_xml_attr *alloc_attr( pj_pool_t *pool )
49 {
50     return PJ_POOL_ZALLOC_T(pool, pj_xml_attr);
51 }
52 
53 /* This is a recursive function! */
xml_parse_node(pj_pool_t * pool,pj_scanner * scanner)54 static pj_xml_node *xml_parse_node( pj_pool_t *pool, pj_scanner *scanner)
55 {
56     pj_xml_node *node;
57     pj_str_t end_name;
58 
59     PJ_CHECK_STACK();
60 
61     if (*scanner->curptr != '<')
62 	on_syntax_error(scanner);
63 
64     /* Handle Processing Instructino (PI) construct (i.e. "<?") */
65     if (*scanner->curptr == '<' && *(scanner->curptr+1) == '?') {
66 	pj_scan_advance_n(scanner, 2, PJ_FALSE);
67 	for (;;) {
68 	    pj_str_t dummy;
69 	    pj_scan_get_until_ch(scanner, '?', &dummy);
70 	    if (*scanner->curptr=='?' && *(scanner->curptr+1)=='>') {
71 		pj_scan_advance_n(scanner, 2, PJ_TRUE);
72 		break;
73 	    } else {
74 		pj_scan_advance_n(scanner, 1, PJ_FALSE);
75 	    }
76 	}
77 	return xml_parse_node(pool, scanner);
78     }
79 
80     /* Handle comments construct (i.e. "<!") */
81     if (pj_scan_strcmp(scanner, "<!", 2) == 0) {
82 	pj_scan_advance_n(scanner, 2, PJ_FALSE);
83 	for (;;) {
84 	    pj_str_t dummy;
85 	    pj_scan_get_until_ch(scanner, '>', &dummy);
86 	    if (pj_scan_strcmp(scanner, ">", 1) == 0) {
87 		pj_scan_advance_n(scanner, 1, PJ_TRUE);
88 		break;
89 	    } else {
90 		pj_scan_advance_n(scanner, 1, PJ_FALSE);
91 	    }
92 	}
93 	return xml_parse_node(pool, scanner);
94     }
95 
96     /* Alloc node. */
97     node = alloc_node(pool);
98 
99     /* Get '<' */
100     pj_scan_get_char(scanner);
101 
102     /* Get node name. */
103     pj_scan_get_until_chr( scanner, " />\t\r\n", &node->name);
104 
105     /* Get attributes. */
106     while (*scanner->curptr != '>' && *scanner->curptr != '/') {
107 	pj_xml_attr *attr = alloc_attr(pool);
108 
109 	pj_scan_get_until_chr( scanner, "=> \t\r\n", &attr->name);
110 	if (*scanner->curptr == '=') {
111 	    pj_scan_get_char( scanner );
112             pj_scan_get_quotes(scanner, "\"'", "\"'", 2, &attr->value);
113 	    /* remove quote characters */
114 	    ++attr->value.ptr;
115 	    attr->value.slen -= 2;
116 	}
117 
118 	pj_list_push_back( &node->attr_head, attr );
119     }
120 
121     if (*scanner->curptr == '/') {
122 	pj_scan_get_char(scanner);
123 	if (pj_scan_get_char(scanner) != '>')
124 	    on_syntax_error(scanner);
125 	return node;
126     }
127 
128     /* Enclosing bracket. */
129     if (pj_scan_get_char(scanner) != '>')
130 	on_syntax_error(scanner);
131 
132     /* Sub nodes. */
133     while (*scanner->curptr == '<' && *(scanner->curptr+1) != '/'
134 				   && *(scanner->curptr+1) != '!')
135     {
136 	pj_xml_node *sub_node = xml_parse_node(pool, scanner);
137 	pj_list_push_back( &node->node_head, sub_node );
138     }
139 
140     /* Content. */
141     if (!pj_scan_is_eof(scanner) && *scanner->curptr != '<') {
142 	pj_scan_get_until_ch(scanner, '<', &node->content);
143     }
144 
145     /* CDATA content. */
146     if (*scanner->curptr == '<' && *(scanner->curptr+1) == '!' &&
147 	pj_scan_strcmp(scanner, "<![CDATA[", 9) == 0)
148     {
149 	pj_scan_advance_n(scanner, 9, PJ_FALSE);
150 	pj_scan_get_until_ch(scanner, ']', &node->content);
151 	while (pj_scan_strcmp(scanner, "]]>", 3)) {
152 	    pj_str_t dummy;
153 	    pj_scan_get_until_ch(scanner, ']', &dummy);
154 	}
155 	node->content.slen = scanner->curptr - node->content.ptr;
156 	pj_scan_advance_n(scanner, 3, PJ_TRUE);
157     }
158 
159     /* Enclosing node. */
160     if (pj_scan_get_char(scanner) != '<' || pj_scan_get_char(scanner) != '/')
161 	on_syntax_error(scanner);
162 
163     pj_scan_get_until_chr(scanner, " \t>", &end_name);
164 
165     /* Compare name. */
166     if (pj_stricmp(&node->name, &end_name) != 0)
167 	on_syntax_error(scanner);
168 
169     /* Enclosing '>' */
170     if (pj_scan_get_char(scanner) != '>')
171 	on_syntax_error(scanner);
172 
173     return node;
174 }
175 
pj_xml_parse(pj_pool_t * pool,char * msg,pj_size_t len)176 PJ_DEF(pj_xml_node*) pj_xml_parse( pj_pool_t *pool, char *msg, pj_size_t len)
177 {
178     pj_xml_node *node = NULL;
179     pj_scanner scanner;
180     PJ_USE_EXCEPTION;
181 
182     if (!msg || !len || !pool)
183 	return NULL;
184 
185     pj_scan_init( &scanner, msg, len,
186 		  PJ_SCAN_AUTOSKIP_WS|PJ_SCAN_AUTOSKIP_NEWLINE,
187 		  &on_syntax_error);
188     PJ_TRY {
189 	node =  xml_parse_node(pool, &scanner);
190     }
191     PJ_CATCH_ANY {
192 	PJ_LOG(4,(THIS_FILE, "Syntax error parsing XML in line %d column %d",
193 		  scanner.line, pj_scan_get_col(&scanner)));
194     }
195     PJ_END;
196     pj_scan_fini( &scanner );
197     return node;
198 }
199 
200 /* This is a recursive function. */
xml_print_node(const pj_xml_node * node,int indent,char * buf,pj_size_t len)201 static int xml_print_node( const pj_xml_node *node, int indent,
202 			   char *buf, pj_size_t len )
203 {
204     int i;
205     char *p = buf;
206     pj_xml_attr *attr;
207     pj_xml_node *sub_node;
208 
209 #define SIZE_LEFT()	((int)(len - (p-buf)))
210 
211     PJ_CHECK_STACK();
212 
213     /* Print name. */
214     if (SIZE_LEFT() < node->name.slen + indent + 5)
215 	return -1;
216     for (i=0; i<indent; ++i)
217 	*p++ = ' ';
218     *p++ = '<';
219     pj_memcpy(p, node->name.ptr, node->name.slen);
220     p += node->name.slen;
221 
222     /* Print attributes. */
223     attr = node->attr_head.next;
224     while (attr != &node->attr_head) {
225 
226 	if (SIZE_LEFT() < attr->name.slen + attr->value.slen + 4)
227 	    return -1;
228 
229 	*p++ = ' ';
230 
231 	/* Attribute name. */
232 	pj_memcpy(p, attr->name.ptr, attr->name.slen);
233 	p += attr->name.slen;
234 
235 	/* Attribute value. */
236 	if (attr->value.slen) {
237 	    *p++ = '=';
238 	    *p++ = '"';
239 	    pj_memcpy(p, attr->value.ptr, attr->value.slen);
240 	    p += attr->value.slen;
241 	    *p++ = '"';
242 	}
243 
244 	attr = attr->next;
245     }
246 
247     /* Check for empty node. */
248     if (node->content.slen==0 &&
249 	node->node_head.next==(pj_xml_node*)&node->node_head)
250     {
251         if (SIZE_LEFT() < 3) return -1;
252 	*p++ = ' ';
253 	*p++ = '/';
254 	*p++ = '>';
255 	return (int)(p-buf);
256     }
257 
258     /* Enclosing '>' */
259     if (SIZE_LEFT() < 1) return -1;
260     *p++ = '>';
261 
262     /* Print sub nodes. */
263     sub_node = node->node_head.next;
264     while (sub_node != (pj_xml_node*)&node->node_head) {
265 	int printed;
266 
267 	if (SIZE_LEFT() < indent + 3)
268 	    return -1;
269 	//*p++ = '\r';
270 	*p++ = '\n';
271 
272 	printed = xml_print_node(sub_node, indent + 1, p, SIZE_LEFT());
273 	if (printed < 0)
274 	    return -1;
275 
276 	p += printed;
277 	sub_node = sub_node->next;
278     }
279 
280     /* Content. */
281     if (node->content.slen) {
282 	if (SIZE_LEFT() < node->content.slen) return -1;
283 	pj_memcpy(p, node->content.ptr, node->content.slen);
284 	p += node->content.slen;
285     }
286 
287     /* Enclosing node. */
288     if (node->node_head.next != (pj_xml_node*)&node->node_head) {
289 	if (SIZE_LEFT() < node->name.slen + 5 + indent)
290 	    return -1;
291 	//*p++ = '\r';
292 	*p++ = '\n';
293 	for (i=0; i<indent; ++i)
294 	    *p++ = ' ';
295     } else {
296 	if (SIZE_LEFT() < node->name.slen + 3)
297 	    return -1;
298     }
299     *p++ = '<';
300     *p++ = '/';
301     pj_memcpy(p, node->name.ptr, node->name.slen);
302     p += node->name.slen;
303     *p++ = '>';
304 
305 #undef SIZE_LEFT
306 
307     return (int)(p-buf);
308 }
309 
pj_xml_print(const pj_xml_node * node,char * buf,pj_size_t len,pj_bool_t include_prolog)310 PJ_DEF(int) pj_xml_print(const pj_xml_node *node, char *buf, pj_size_t len,
311 			 pj_bool_t include_prolog)
312 {
313     int prolog_len = 0;
314     int printed;
315 
316     if (!node || !buf || !len)
317 	return 0;
318 
319     if (include_prolog) {
320 	pj_str_t prolog = {"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39};
321 	if ((int)len < prolog.slen)
322 	    return -1;
323 	pj_memcpy(buf, prolog.ptr, prolog.slen);
324 	prolog_len = (int)prolog.slen;
325     }
326 
327     printed = xml_print_node(node, 0, buf+prolog_len, len-prolog_len) + prolog_len;
328     if (printed > 0 && len-printed >= 1) {
329 	buf[printed++] = '\n';
330     }
331     return printed;
332 }
333 
pj_xml_node_new(pj_pool_t * pool,const pj_str_t * name)334 PJ_DEF(pj_xml_node*) pj_xml_node_new(pj_pool_t *pool, const pj_str_t *name)
335 {
336     pj_xml_node *node = alloc_node(pool);
337     pj_strdup(pool, &node->name, name);
338     return node;
339 }
340 
pj_xml_attr_new(pj_pool_t * pool,const pj_str_t * name,const pj_str_t * value)341 PJ_DEF(pj_xml_attr*) pj_xml_attr_new( pj_pool_t *pool, const pj_str_t *name,
342 				      const pj_str_t *value)
343 {
344     pj_xml_attr *attr = alloc_attr(pool);
345     pj_strdup( pool, &attr->name, name);
346     pj_strdup( pool, &attr->value, value);
347     return attr;
348 }
349 
pj_xml_add_node(pj_xml_node * parent,pj_xml_node * node)350 PJ_DEF(void) pj_xml_add_node( pj_xml_node *parent, pj_xml_node *node )
351 {
352     pj_list_push_back(&parent->node_head, node);
353 }
354 
pj_xml_add_attr(pj_xml_node * node,pj_xml_attr * attr)355 PJ_DEF(void) pj_xml_add_attr( pj_xml_node *node, pj_xml_attr *attr )
356 {
357     pj_list_push_back(&node->attr_head, attr);
358 }
359 
pj_xml_find_node(const pj_xml_node * parent,const pj_str_t * name)360 PJ_DEF(pj_xml_node*) pj_xml_find_node(const pj_xml_node *parent,
361 				      const pj_str_t *name)
362 {
363     const pj_xml_node *node = parent->node_head.next;
364 
365     PJ_CHECK_STACK();
366 
367     while (node != (void*)&parent->node_head) {
368 	if (pj_stricmp(&node->name, name) == 0)
369 	    return (pj_xml_node*)node;
370 	node = node->next;
371     }
372     return NULL;
373 }
374 
pj_xml_find_node_rec(const pj_xml_node * parent,const pj_str_t * name)375 PJ_DEF(pj_xml_node*) pj_xml_find_node_rec(const pj_xml_node *parent,
376 					  const pj_str_t *name)
377 {
378     const pj_xml_node *node = parent->node_head.next;
379 
380     PJ_CHECK_STACK();
381 
382     while (node != (void*)&parent->node_head) {
383 	pj_xml_node *found;
384 	if (pj_stricmp(&node->name, name) == 0)
385 	    return (pj_xml_node*)node;
386 	found = pj_xml_find_node_rec(node, name);
387 	if (found)
388 	    return (pj_xml_node*)found;
389 	node = node->next;
390     }
391     return NULL;
392 }
393 
pj_xml_find_next_node(const pj_xml_node * parent,const pj_xml_node * node,const pj_str_t * name)394 PJ_DEF(pj_xml_node*) pj_xml_find_next_node( const pj_xml_node *parent,
395 					    const pj_xml_node *node,
396 					    const pj_str_t *name)
397 {
398     PJ_CHECK_STACK();
399 
400     node = node->next;
401     while (node != (void*)&parent->node_head) {
402 	if (pj_stricmp(&node->name, name) == 0)
403 	    return (pj_xml_node*)node;
404 	node = node->next;
405     }
406     return NULL;
407 }
408 
409 
pj_xml_find_attr(const pj_xml_node * node,const pj_str_t * name,const pj_str_t * value)410 PJ_DEF(pj_xml_attr*) pj_xml_find_attr( const pj_xml_node *node,
411 				       const pj_str_t *name,
412 				       const pj_str_t *value)
413 {
414     const pj_xml_attr *attr = node->attr_head.next;
415     while (attr != (void*)&node->attr_head) {
416 	if (pj_stricmp(&attr->name, name)==0) {
417 	    if (value) {
418 		if (pj_stricmp(&attr->value, value)==0)
419 		    return (pj_xml_attr*)attr;
420 	    } else {
421 		return (pj_xml_attr*)attr;
422 	    }
423 	}
424 	attr = attr->next;
425     }
426     return NULL;
427 }
428 
429 
430 
pj_xml_find(const pj_xml_node * parent,const pj_str_t * name,const void * data,pj_bool_t (* match)(const pj_xml_node *,const void *))431 PJ_DEF(pj_xml_node*) pj_xml_find( const pj_xml_node *parent,
432 				  const pj_str_t *name,
433 				  const void *data,
434 				  pj_bool_t (*match)(const pj_xml_node *,
435 						     const void*))
436 {
437     const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
438 
439     if (!name && !match)
440 	return NULL;
441 
442     while (node != (const pj_xml_node*) &parent->node_head) {
443 	if (name) {
444 	    if (pj_stricmp(&node->name, name)!=0) {
445 		node = node->next;
446 		continue;
447 	    }
448 	}
449 	if (match) {
450 	    if (match(node, data))
451 		return (pj_xml_node*)node;
452 	} else {
453 	    return (pj_xml_node*)node;
454 	}
455 
456 	node = node->next;
457     }
458     return NULL;
459 }
460 
pj_xml_find_rec(const pj_xml_node * parent,const pj_str_t * name,const void * data,pj_bool_t (* match)(const pj_xml_node *,const void *))461 PJ_DEF(pj_xml_node*) pj_xml_find_rec( const pj_xml_node *parent,
462 				      const pj_str_t *name,
463 				      const void *data,
464 				      pj_bool_t (*match)(const pj_xml_node*,
465 							 const void*))
466 {
467     const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
468 
469     if (!name && !match)
470 	return NULL;
471 
472     while (node != (const pj_xml_node*) &parent->node_head) {
473 	pj_xml_node *found;
474 
475 	if (name) {
476 	    if (pj_stricmp(&node->name, name)==0) {
477 		if (match) {
478 		    if (match(node, data))
479 			return (pj_xml_node*)node;
480 		} else {
481 		    return (pj_xml_node*)node;
482 		}
483 	    }
484 
485 	} else if (match) {
486 	    if (match(node, data))
487 		return (pj_xml_node*)node;
488 	}
489 
490 	found = pj_xml_find_rec(node, name, data, match);
491 	if (found)
492 	    return found;
493 
494 	node = node->next;
495     }
496     return NULL;
497 }
498 
pj_xml_clone(pj_pool_t * pool,const pj_xml_node * rhs)499 PJ_DEF(pj_xml_node*) pj_xml_clone( pj_pool_t *pool, const pj_xml_node *rhs)
500 {
501     pj_xml_node *node;
502     const pj_xml_attr *r_attr;
503     const pj_xml_node *child;
504 
505     node = alloc_node(pool);
506 
507     pj_strdup(pool, &node->name, &rhs->name);
508     pj_strdup(pool, &node->content, &rhs->content);
509 
510     /* Clone all attributes */
511     r_attr = rhs->attr_head.next;
512     while (r_attr != &rhs->attr_head) {
513 
514 	pj_xml_attr *attr;
515 
516 	attr = alloc_attr(pool);
517 	pj_strdup(pool, &attr->name, &r_attr->name);
518 	pj_strdup(pool, &attr->value, &r_attr->value);
519 
520 	pj_list_push_back(&node->attr_head, attr);
521 
522 	r_attr = r_attr->next;
523     }
524 
525     /* Clone all child nodes. */
526     child = rhs->node_head.next;
527     while (child != (pj_xml_node*) &rhs->node_head) {
528 	pj_xml_node *new_child;
529 
530 	new_child = pj_xml_clone(pool, child);
531 	pj_list_push_back(&node->node_head, new_child);
532 
533 	child = child->next;
534     }
535 
536     return node;
537 }
538