1 /* $Id$ */
2 /*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 #include <pjlib-util/xml.h>
21 #include <pjlib-util/scanner.h>
22 #include <pj/except.h>
23 #include <pj/pool.h>
24 #include <pj/string.h>
25 #include <pj/log.h>
26 #include <pj/os.h>
27
28 #define EX_SYNTAX_ERROR 12
29 #define THIS_FILE "xml.c"
30
on_syntax_error(struct pj_scanner * scanner)31 static void on_syntax_error(struct pj_scanner *scanner)
32 {
33 PJ_UNUSED_ARG(scanner);
34 PJ_THROW(EX_SYNTAX_ERROR);
35 }
36
alloc_node(pj_pool_t * pool)37 static pj_xml_node *alloc_node( pj_pool_t *pool )
38 {
39 pj_xml_node *node;
40
41 node = PJ_POOL_ZALLOC_T(pool, pj_xml_node);
42 pj_list_init( &node->attr_head );
43 pj_list_init( &node->node_head );
44
45 return node;
46 }
47
alloc_attr(pj_pool_t * pool)48 static pj_xml_attr *alloc_attr( pj_pool_t *pool )
49 {
50 return PJ_POOL_ZALLOC_T(pool, pj_xml_attr);
51 }
52
53 /* This is a recursive function! */
xml_parse_node(pj_pool_t * pool,pj_scanner * scanner)54 static pj_xml_node *xml_parse_node( pj_pool_t *pool, pj_scanner *scanner)
55 {
56 pj_xml_node *node;
57 pj_str_t end_name;
58
59 PJ_CHECK_STACK();
60
61 if (*scanner->curptr != '<')
62 on_syntax_error(scanner);
63
64 /* Handle Processing Instructino (PI) construct (i.e. "<?") */
65 if (*scanner->curptr == '<' && *(scanner->curptr+1) == '?') {
66 pj_scan_advance_n(scanner, 2, PJ_FALSE);
67 for (;;) {
68 pj_str_t dummy;
69 pj_scan_get_until_ch(scanner, '?', &dummy);
70 if (*scanner->curptr=='?' && *(scanner->curptr+1)=='>') {
71 pj_scan_advance_n(scanner, 2, PJ_TRUE);
72 break;
73 } else {
74 pj_scan_advance_n(scanner, 1, PJ_FALSE);
75 }
76 }
77 return xml_parse_node(pool, scanner);
78 }
79
80 /* Handle comments construct (i.e. "<!") */
81 if (pj_scan_strcmp(scanner, "<!", 2) == 0) {
82 pj_scan_advance_n(scanner, 2, PJ_FALSE);
83 for (;;) {
84 pj_str_t dummy;
85 pj_scan_get_until_ch(scanner, '>', &dummy);
86 if (pj_scan_strcmp(scanner, ">", 1) == 0) {
87 pj_scan_advance_n(scanner, 1, PJ_TRUE);
88 break;
89 } else {
90 pj_scan_advance_n(scanner, 1, PJ_FALSE);
91 }
92 }
93 return xml_parse_node(pool, scanner);
94 }
95
96 /* Alloc node. */
97 node = alloc_node(pool);
98
99 /* Get '<' */
100 pj_scan_get_char(scanner);
101
102 /* Get node name. */
103 pj_scan_get_until_chr( scanner, " />\t\r\n", &node->name);
104
105 /* Get attributes. */
106 while (*scanner->curptr != '>' && *scanner->curptr != '/') {
107 pj_xml_attr *attr = alloc_attr(pool);
108
109 pj_scan_get_until_chr( scanner, "=> \t\r\n", &attr->name);
110 if (*scanner->curptr == '=') {
111 pj_scan_get_char( scanner );
112 pj_scan_get_quotes(scanner, "\"'", "\"'", 2, &attr->value);
113 /* remove quote characters */
114 ++attr->value.ptr;
115 attr->value.slen -= 2;
116 }
117
118 pj_list_push_back( &node->attr_head, attr );
119 }
120
121 if (*scanner->curptr == '/') {
122 pj_scan_get_char(scanner);
123 if (pj_scan_get_char(scanner) != '>')
124 on_syntax_error(scanner);
125 return node;
126 }
127
128 /* Enclosing bracket. */
129 if (pj_scan_get_char(scanner) != '>')
130 on_syntax_error(scanner);
131
132 /* Sub nodes. */
133 while (*scanner->curptr == '<' && *(scanner->curptr+1) != '/'
134 && *(scanner->curptr+1) != '!')
135 {
136 pj_xml_node *sub_node = xml_parse_node(pool, scanner);
137 pj_list_push_back( &node->node_head, sub_node );
138 }
139
140 /* Content. */
141 if (!pj_scan_is_eof(scanner) && *scanner->curptr != '<') {
142 pj_scan_get_until_ch(scanner, '<', &node->content);
143 }
144
145 /* CDATA content. */
146 if (*scanner->curptr == '<' && *(scanner->curptr+1) == '!' &&
147 pj_scan_strcmp(scanner, "<![CDATA[", 9) == 0)
148 {
149 pj_scan_advance_n(scanner, 9, PJ_FALSE);
150 pj_scan_get_until_ch(scanner, ']', &node->content);
151 while (pj_scan_strcmp(scanner, "]]>", 3)) {
152 pj_str_t dummy;
153 pj_scan_get_until_ch(scanner, ']', &dummy);
154 }
155 node->content.slen = scanner->curptr - node->content.ptr;
156 pj_scan_advance_n(scanner, 3, PJ_TRUE);
157 }
158
159 /* Enclosing node. */
160 if (pj_scan_get_char(scanner) != '<' || pj_scan_get_char(scanner) != '/')
161 on_syntax_error(scanner);
162
163 pj_scan_get_until_chr(scanner, " \t>", &end_name);
164
165 /* Compare name. */
166 if (pj_stricmp(&node->name, &end_name) != 0)
167 on_syntax_error(scanner);
168
169 /* Enclosing '>' */
170 if (pj_scan_get_char(scanner) != '>')
171 on_syntax_error(scanner);
172
173 return node;
174 }
175
pj_xml_parse(pj_pool_t * pool,char * msg,pj_size_t len)176 PJ_DEF(pj_xml_node*) pj_xml_parse( pj_pool_t *pool, char *msg, pj_size_t len)
177 {
178 pj_xml_node *node = NULL;
179 pj_scanner scanner;
180 PJ_USE_EXCEPTION;
181
182 if (!msg || !len || !pool)
183 return NULL;
184
185 pj_scan_init( &scanner, msg, len,
186 PJ_SCAN_AUTOSKIP_WS|PJ_SCAN_AUTOSKIP_NEWLINE,
187 &on_syntax_error);
188 PJ_TRY {
189 node = xml_parse_node(pool, &scanner);
190 }
191 PJ_CATCH_ANY {
192 PJ_LOG(4,(THIS_FILE, "Syntax error parsing XML in line %d column %d",
193 scanner.line, pj_scan_get_col(&scanner)));
194 }
195 PJ_END;
196 pj_scan_fini( &scanner );
197 return node;
198 }
199
200 /* This is a recursive function. */
xml_print_node(const pj_xml_node * node,int indent,char * buf,pj_size_t len)201 static int xml_print_node( const pj_xml_node *node, int indent,
202 char *buf, pj_size_t len )
203 {
204 int i;
205 char *p = buf;
206 pj_xml_attr *attr;
207 pj_xml_node *sub_node;
208
209 #define SIZE_LEFT() ((int)(len - (p-buf)))
210
211 PJ_CHECK_STACK();
212
213 /* Print name. */
214 if (SIZE_LEFT() < node->name.slen + indent + 5)
215 return -1;
216 for (i=0; i<indent; ++i)
217 *p++ = ' ';
218 *p++ = '<';
219 pj_memcpy(p, node->name.ptr, node->name.slen);
220 p += node->name.slen;
221
222 /* Print attributes. */
223 attr = node->attr_head.next;
224 while (attr != &node->attr_head) {
225
226 if (SIZE_LEFT() < attr->name.slen + attr->value.slen + 4)
227 return -1;
228
229 *p++ = ' ';
230
231 /* Attribute name. */
232 pj_memcpy(p, attr->name.ptr, attr->name.slen);
233 p += attr->name.slen;
234
235 /* Attribute value. */
236 if (attr->value.slen) {
237 *p++ = '=';
238 *p++ = '"';
239 pj_memcpy(p, attr->value.ptr, attr->value.slen);
240 p += attr->value.slen;
241 *p++ = '"';
242 }
243
244 attr = attr->next;
245 }
246
247 /* Check for empty node. */
248 if (node->content.slen==0 &&
249 node->node_head.next==(pj_xml_node*)&node->node_head)
250 {
251 if (SIZE_LEFT() < 3) return -1;
252 *p++ = ' ';
253 *p++ = '/';
254 *p++ = '>';
255 return (int)(p-buf);
256 }
257
258 /* Enclosing '>' */
259 if (SIZE_LEFT() < 1) return -1;
260 *p++ = '>';
261
262 /* Print sub nodes. */
263 sub_node = node->node_head.next;
264 while (sub_node != (pj_xml_node*)&node->node_head) {
265 int printed;
266
267 if (SIZE_LEFT() < indent + 3)
268 return -1;
269 //*p++ = '\r';
270 *p++ = '\n';
271
272 printed = xml_print_node(sub_node, indent + 1, p, SIZE_LEFT());
273 if (printed < 0)
274 return -1;
275
276 p += printed;
277 sub_node = sub_node->next;
278 }
279
280 /* Content. */
281 if (node->content.slen) {
282 if (SIZE_LEFT() < node->content.slen) return -1;
283 pj_memcpy(p, node->content.ptr, node->content.slen);
284 p += node->content.slen;
285 }
286
287 /* Enclosing node. */
288 if (node->node_head.next != (pj_xml_node*)&node->node_head) {
289 if (SIZE_LEFT() < node->name.slen + 5 + indent)
290 return -1;
291 //*p++ = '\r';
292 *p++ = '\n';
293 for (i=0; i<indent; ++i)
294 *p++ = ' ';
295 } else {
296 if (SIZE_LEFT() < node->name.slen + 3)
297 return -1;
298 }
299 *p++ = '<';
300 *p++ = '/';
301 pj_memcpy(p, node->name.ptr, node->name.slen);
302 p += node->name.slen;
303 *p++ = '>';
304
305 #undef SIZE_LEFT
306
307 return (int)(p-buf);
308 }
309
pj_xml_print(const pj_xml_node * node,char * buf,pj_size_t len,pj_bool_t include_prolog)310 PJ_DEF(int) pj_xml_print(const pj_xml_node *node, char *buf, pj_size_t len,
311 pj_bool_t include_prolog)
312 {
313 int prolog_len = 0;
314 int printed;
315
316 if (!node || !buf || !len)
317 return 0;
318
319 if (include_prolog) {
320 pj_str_t prolog = {"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39};
321 if ((int)len < prolog.slen)
322 return -1;
323 pj_memcpy(buf, prolog.ptr, prolog.slen);
324 prolog_len = (int)prolog.slen;
325 }
326
327 printed = xml_print_node(node, 0, buf+prolog_len, len-prolog_len) + prolog_len;
328 if (printed > 0 && len-printed >= 1) {
329 buf[printed++] = '\n';
330 }
331 return printed;
332 }
333
pj_xml_node_new(pj_pool_t * pool,const pj_str_t * name)334 PJ_DEF(pj_xml_node*) pj_xml_node_new(pj_pool_t *pool, const pj_str_t *name)
335 {
336 pj_xml_node *node = alloc_node(pool);
337 pj_strdup(pool, &node->name, name);
338 return node;
339 }
340
pj_xml_attr_new(pj_pool_t * pool,const pj_str_t * name,const pj_str_t * value)341 PJ_DEF(pj_xml_attr*) pj_xml_attr_new( pj_pool_t *pool, const pj_str_t *name,
342 const pj_str_t *value)
343 {
344 pj_xml_attr *attr = alloc_attr(pool);
345 pj_strdup( pool, &attr->name, name);
346 pj_strdup( pool, &attr->value, value);
347 return attr;
348 }
349
pj_xml_add_node(pj_xml_node * parent,pj_xml_node * node)350 PJ_DEF(void) pj_xml_add_node( pj_xml_node *parent, pj_xml_node *node )
351 {
352 pj_list_push_back(&parent->node_head, node);
353 }
354
pj_xml_add_attr(pj_xml_node * node,pj_xml_attr * attr)355 PJ_DEF(void) pj_xml_add_attr( pj_xml_node *node, pj_xml_attr *attr )
356 {
357 pj_list_push_back(&node->attr_head, attr);
358 }
359
pj_xml_find_node(const pj_xml_node * parent,const pj_str_t * name)360 PJ_DEF(pj_xml_node*) pj_xml_find_node(const pj_xml_node *parent,
361 const pj_str_t *name)
362 {
363 const pj_xml_node *node = parent->node_head.next;
364
365 PJ_CHECK_STACK();
366
367 while (node != (void*)&parent->node_head) {
368 if (pj_stricmp(&node->name, name) == 0)
369 return (pj_xml_node*)node;
370 node = node->next;
371 }
372 return NULL;
373 }
374
pj_xml_find_node_rec(const pj_xml_node * parent,const pj_str_t * name)375 PJ_DEF(pj_xml_node*) pj_xml_find_node_rec(const pj_xml_node *parent,
376 const pj_str_t *name)
377 {
378 const pj_xml_node *node = parent->node_head.next;
379
380 PJ_CHECK_STACK();
381
382 while (node != (void*)&parent->node_head) {
383 pj_xml_node *found;
384 if (pj_stricmp(&node->name, name) == 0)
385 return (pj_xml_node*)node;
386 found = pj_xml_find_node_rec(node, name);
387 if (found)
388 return (pj_xml_node*)found;
389 node = node->next;
390 }
391 return NULL;
392 }
393
pj_xml_find_next_node(const pj_xml_node * parent,const pj_xml_node * node,const pj_str_t * name)394 PJ_DEF(pj_xml_node*) pj_xml_find_next_node( const pj_xml_node *parent,
395 const pj_xml_node *node,
396 const pj_str_t *name)
397 {
398 PJ_CHECK_STACK();
399
400 node = node->next;
401 while (node != (void*)&parent->node_head) {
402 if (pj_stricmp(&node->name, name) == 0)
403 return (pj_xml_node*)node;
404 node = node->next;
405 }
406 return NULL;
407 }
408
409
pj_xml_find_attr(const pj_xml_node * node,const pj_str_t * name,const pj_str_t * value)410 PJ_DEF(pj_xml_attr*) pj_xml_find_attr( const pj_xml_node *node,
411 const pj_str_t *name,
412 const pj_str_t *value)
413 {
414 const pj_xml_attr *attr = node->attr_head.next;
415 while (attr != (void*)&node->attr_head) {
416 if (pj_stricmp(&attr->name, name)==0) {
417 if (value) {
418 if (pj_stricmp(&attr->value, value)==0)
419 return (pj_xml_attr*)attr;
420 } else {
421 return (pj_xml_attr*)attr;
422 }
423 }
424 attr = attr->next;
425 }
426 return NULL;
427 }
428
429
430
pj_xml_find(const pj_xml_node * parent,const pj_str_t * name,const void * data,pj_bool_t (* match)(const pj_xml_node *,const void *))431 PJ_DEF(pj_xml_node*) pj_xml_find( const pj_xml_node *parent,
432 const pj_str_t *name,
433 const void *data,
434 pj_bool_t (*match)(const pj_xml_node *,
435 const void*))
436 {
437 const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
438
439 if (!name && !match)
440 return NULL;
441
442 while (node != (const pj_xml_node*) &parent->node_head) {
443 if (name) {
444 if (pj_stricmp(&node->name, name)!=0) {
445 node = node->next;
446 continue;
447 }
448 }
449 if (match) {
450 if (match(node, data))
451 return (pj_xml_node*)node;
452 } else {
453 return (pj_xml_node*)node;
454 }
455
456 node = node->next;
457 }
458 return NULL;
459 }
460
pj_xml_find_rec(const pj_xml_node * parent,const pj_str_t * name,const void * data,pj_bool_t (* match)(const pj_xml_node *,const void *))461 PJ_DEF(pj_xml_node*) pj_xml_find_rec( const pj_xml_node *parent,
462 const pj_str_t *name,
463 const void *data,
464 pj_bool_t (*match)(const pj_xml_node*,
465 const void*))
466 {
467 const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
468
469 if (!name && !match)
470 return NULL;
471
472 while (node != (const pj_xml_node*) &parent->node_head) {
473 pj_xml_node *found;
474
475 if (name) {
476 if (pj_stricmp(&node->name, name)==0) {
477 if (match) {
478 if (match(node, data))
479 return (pj_xml_node*)node;
480 } else {
481 return (pj_xml_node*)node;
482 }
483 }
484
485 } else if (match) {
486 if (match(node, data))
487 return (pj_xml_node*)node;
488 }
489
490 found = pj_xml_find_rec(node, name, data, match);
491 if (found)
492 return found;
493
494 node = node->next;
495 }
496 return NULL;
497 }
498
pj_xml_clone(pj_pool_t * pool,const pj_xml_node * rhs)499 PJ_DEF(pj_xml_node*) pj_xml_clone( pj_pool_t *pool, const pj_xml_node *rhs)
500 {
501 pj_xml_node *node;
502 const pj_xml_attr *r_attr;
503 const pj_xml_node *child;
504
505 node = alloc_node(pool);
506
507 pj_strdup(pool, &node->name, &rhs->name);
508 pj_strdup(pool, &node->content, &rhs->content);
509
510 /* Clone all attributes */
511 r_attr = rhs->attr_head.next;
512 while (r_attr != &rhs->attr_head) {
513
514 pj_xml_attr *attr;
515
516 attr = alloc_attr(pool);
517 pj_strdup(pool, &attr->name, &r_attr->name);
518 pj_strdup(pool, &attr->value, &r_attr->value);
519
520 pj_list_push_back(&node->attr_head, attr);
521
522 r_attr = r_attr->next;
523 }
524
525 /* Clone all child nodes. */
526 child = rhs->node_head.next;
527 while (child != (pj_xml_node*) &rhs->node_head) {
528 pj_xml_node *new_child;
529
530 new_child = pj_xml_clone(pool, child);
531 pj_list_push_back(&node->node_head, new_child);
532
533 child = child->next;
534 }
535
536 return node;
537 }
538