1 /**
2  * @file xml.c
3  * @author Radek Krejci <rkrejci@cesnet.cz>
4  * @brief XML parser implementation for libyang
5  *
6  * Copyright (c) 2015 CESNET, z.s.p.o.
7  *
8  * This source code is licensed under BSD 3-Clause License (the "License").
9  * You may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     https://opensource.org/licenses/BSD-3-Clause
13  */
14 
15 #include <assert.h>
16 #include <errno.h>
17 #include <ctype.h>
18 #include <stdint.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <pthread.h>
24 #include <sys/stat.h>
25 #include <sys/mman.h>
26 #include <fcntl.h>
27 
28 #include "common.h"
29 #include "hash_table.h"
30 #include "printer.h"
31 #include "parser.h"
32 #include "tree_schema.h"
33 #include "xml_internal.h"
34 #include "xpath.h"
35 
36 #define ign_xmlws(p)                                                    \
37     while (is_xmlws(*p)) {                                              \
38         p++;                                                            \
39     }
40 
41 static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr);
42 
43 API const struct lyxml_ns *
lyxml_get_ns(const struct lyxml_elem * elem,const char * prefix)44 lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix)
45 {
46     FUN_IN;
47 
48     struct lyxml_attr *attr;
49 
50     if (!elem) {
51         return NULL;
52     }
53 
54     for (attr = elem->attr; attr; attr = attr->next) {
55         if (attr->type != LYXML_ATTR_NS) {
56             continue;
57         }
58         if (!attr->name) {
59             if (!prefix) {
60                 /* default namespace found */
61                 if (!attr->value) {
62                     /* empty default namespace -> no default namespace */
63                     return NULL;
64                 }
65                 return (struct lyxml_ns *)attr;
66             }
67         } else if (prefix && !strcmp(attr->name, prefix)) {
68             /* prefix found */
69             return (struct lyxml_ns *)attr;
70         }
71     }
72 
73     /* go recursively */
74     return lyxml_get_ns(elem->parent, prefix);
75 }
76 
77 static void
lyxml_correct_attr_ns(struct ly_ctx * ctx,struct lyxml_attr * attr,struct lyxml_elem * attr_parent,int copy_ns)78 lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns)
79 {
80     const struct lyxml_ns *tmp_ns;
81     struct lyxml_elem *ns_root, *attr_root;
82 
83     if ((attr->type != LYXML_ATTR_NS) && attr->ns) {
84         /* find the root of attr */
85         for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent);
86 
87         /* find the root of attr NS */
88         for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent);
89 
90         /* attr NS is defined outside attr parent subtree */
91         if (ns_root != attr_root) {
92             if (copy_ns) {
93                 tmp_ns = attr->ns;
94                 /* we may have already copied the NS over? */
95                 attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix);
96 
97                 /* we haven't copied it over, copy it now */
98                 if (!attr->ns) {
99                     attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns);
100                 }
101             } else {
102                 attr->ns = NULL;
103             }
104         }
105     }
106 }
107 
108 static struct lyxml_attr *
lyxml_dup_attr(struct ly_ctx * ctx,struct lyxml_elem * parent,struct lyxml_attr * attr)109 lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
110 {
111     struct lyxml_attr *result, *a;
112 
113     if (!attr || !parent) {
114         return NULL;
115     }
116 
117     if (attr->type == LYXML_ATTR_NS) {
118         /* this is correct, despite that all attributes seems like a standard
119          * attributes (struct lyxml_attr), some of them can be namespace
120          * definitions (and in that case they are struct lyxml_ns).
121          */
122         result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns));
123     } else {
124         result = calloc(1, sizeof (struct lyxml_attr));
125     }
126     LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
127 
128     result->value = lydict_insert(ctx, attr->value, 0);
129     result->name = lydict_insert(ctx, attr->name, 0);
130     result->type = attr->type;
131 
132     /* set namespace in case of standard attributes */
133     if (result->type == LYXML_ATTR_STD && attr->ns) {
134         result->ns = attr->ns;
135         lyxml_correct_attr_ns(ctx, result, parent, 1);
136     }
137 
138     /* set parent pointer in case of namespace attribute */
139     if (result->type == LYXML_ATTR_NS) {
140         ((struct lyxml_ns *)result)->parent = parent;
141     }
142 
143     /* put attribute into the parent's attributes list */
144     if (parent->attr) {
145         /* go to the end of the list */
146         for (a = parent->attr; a->next; a = a->next);
147         /* and append new attribute */
148         a->next = result;
149     } else {
150         /* add the first attribute in the list */
151         parent->attr = result;
152     }
153 
154     return result;
155 }
156 
157 static void
lyxml_correct_content_ns(struct ly_ctx * ctx,struct lyxml_elem * elem,struct lyxml_elem * orig)158 lyxml_correct_content_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *orig)
159 {
160     const char *end, *cur_expr;
161     char *prefix;
162     uint16_t i;
163     size_t pref_len;
164     const struct lyxml_ns *ns;
165     struct lyxp_expr *exp;
166     enum int_log_opts prev_ilo;
167 
168     /* it may not be a valid XPath expression */
169     ly_ilo_change(NULL, ILO_IGNORE, &prev_ilo, NULL);
170     exp = lyxp_parse_expr(ctx, elem->content);
171     ly_ilo_restore(NULL, prev_ilo, NULL, 0);
172     if (!exp) {
173         goto cleanup;
174     }
175 
176     for (i = 0; i < exp->used; ++i) {
177         cur_expr = &exp->expr[exp->expr_pos[i]];
178 
179         if ((exp->tokens[i] == LYXP_TOKEN_NAMETEST) && (end = strnchr(cur_expr, ':', exp->tok_len[i]))) {
180             /* get the prefix */
181             pref_len = end - cur_expr;
182             prefix = strndup(cur_expr, pref_len);
183             if (!prefix) {
184                 LOGMEM(ctx);
185                 goto cleanup;
186             }
187             ns = lyxml_get_ns(elem, prefix);
188 
189             /* we already have the namespace */
190             if (ns) {
191                 free(prefix);
192                 continue;
193             }
194 
195             /* find the namespace in the original XML */
196             ns = lyxml_get_ns(orig, prefix);
197             free(prefix);
198 
199             /* copy the namespace over, if any */
200             if (ns && !lyxml_dup_attr(ctx, elem, (struct lyxml_attr *)ns)) {
201                 LOGINT(ctx);
202                 goto cleanup;
203             }
204         }
205     }
206 
207 cleanup:
208     lyxp_expr_free(exp);
209 }
210 
211 void
lyxml_correct_elem_ns(struct ly_ctx * ctx,struct lyxml_elem * elem,struct lyxml_elem * orig,int copy_ns,int correct_attrs)212 lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *orig, int copy_ns,
213                       int correct_attrs)
214 {
215     const struct lyxml_ns *tmp_ns;
216     struct lyxml_elem *elem_root, *ns_root, *tmp, *iter;
217     struct lyxml_attr *attr;
218 
219     /* find the root of elem */
220     for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent);
221 
222     LY_TREE_DFS_BEGIN(elem, tmp, iter) {
223         if (iter->ns) {
224             /* find the root of elem NS */
225             for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent);
226 
227             /* elem NS is defined outside elem subtree */
228             if (ns_root != elem_root) {
229                 if (copy_ns) {
230                     tmp_ns = iter->ns;
231                     /* we may have already copied the NS over? */
232                     iter->ns = lyxml_get_ns(iter, tmp_ns->prefix);
233 
234                     /* we haven't copied it over, copy it now */
235                     if (!iter->ns) {
236                         iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns);
237                     }
238                 } else {
239                     iter->ns = NULL;
240                 }
241             }
242         }
243         if (iter->content && iter->content[0] && copy_ns) {
244             lyxml_correct_content_ns(ctx, iter, orig);
245         }
246         if (correct_attrs) {
247             LY_TREE_FOR(iter->attr, attr) {
248                 lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns);
249             }
250         }
251         LY_TREE_DFS_END(elem, tmp, iter);
252     }
253 }
254 
255 struct lyxml_elem *
lyxml_dup_elem(struct ly_ctx * ctx,struct lyxml_elem * elem,struct lyxml_elem * parent,int recursive,int with_siblings)256 lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive, int with_siblings)
257 {
258     struct lyxml_elem *dup, *result = NULL;
259     struct lyxml_attr *attr;
260 
261     if (!elem) {
262         return NULL;
263     }
264 
265     LY_TREE_FOR(elem, elem) {
266         dup = calloc(1, sizeof *dup);
267         LY_CHECK_ERR_RETURN(!dup, LOGMEM(ctx), NULL);
268         dup->content = lydict_insert(ctx, elem->content, 0);
269         dup->name = lydict_insert(ctx, elem->name, 0);
270         dup->flags = elem->flags;
271         dup->prev = dup;
272 
273         if (parent) {
274             lyxml_add_child(ctx, parent, dup);
275         } else if (result) {
276             dup->prev = result->prev;
277             dup->prev->next = dup;
278             result->prev = dup;
279         }
280 
281         /* keep old namespace for now */
282         dup->ns = elem->ns;
283 
284         /* duplicate attributes */
285         for (attr = elem->attr; attr; attr = attr->next) {
286             lyxml_dup_attr(ctx, dup, attr);
287         }
288 
289         /* correct namespaces */
290         lyxml_correct_elem_ns(ctx, dup, elem, 1, 0);
291 
292         if (recursive) {
293             /* duplicate children */
294             lyxml_dup_elem(ctx, elem->child, dup, 1, 1);
295         }
296 
297         /* set result (first sibling) */
298         if (!result) {
299             result = dup;
300         }
301 
302         if (!with_siblings) {
303             break;
304         }
305     }
306 
307     return result;
308 }
309 
310 API struct lyxml_elem *
lyxml_dup(struct ly_ctx * ctx,struct lyxml_elem * root)311 lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root)
312 {
313     FUN_IN;
314 
315     return lyxml_dup_elem(ctx, root, NULL, 1, 0);
316 }
317 
318 void
lyxml_unlink_elem(struct ly_ctx * ctx,struct lyxml_elem * elem,int copy_ns)319 lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns)
320 {
321     struct lyxml_elem *parent, *first;
322 
323     if (!elem) {
324         return;
325     }
326 
327     /* store pointers to important nodes */
328     parent = elem->parent;
329 
330     /* unlink from parent */
331     if (parent) {
332         if (parent->child == elem) {
333             /* we unlink the first child */
334             /* update the parent's link */
335             parent->child = elem->next;
336         }
337         /* forget about the parent */
338         elem->parent = NULL;
339     }
340 
341     if (copy_ns < 2) {
342         lyxml_correct_elem_ns(ctx, elem, parent, copy_ns, 1);
343     }
344 
345     /* unlink from siblings */
346     if (elem->prev == elem) {
347         /* there are no more siblings */
348         return;
349     }
350     if (elem->next) {
351         elem->next->prev = elem->prev;
352     } else {
353         /* unlinking the last element */
354         if (parent) {
355             first = parent->child;
356         } else {
357             first = elem;
358             while (first->prev->next) {
359                 first = first->prev;
360             }
361         }
362         first->prev = elem->prev;
363     }
364     if (elem->prev->next) {
365         elem->prev->next = elem->next;
366     }
367 
368     /* clean up the unlinked element */
369     elem->next = NULL;
370     elem->prev = elem;
371 }
372 
373 API void
lyxml_unlink(struct ly_ctx * ctx,struct lyxml_elem * elem)374 lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem)
375 {
376     FUN_IN;
377 
378     if (!elem) {
379         return;
380     }
381 
382     lyxml_unlink_elem(ctx, elem, 1);
383 }
384 
385 void
lyxml_free_attr(struct ly_ctx * ctx,struct lyxml_elem * parent,struct lyxml_attr * attr)386 lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr)
387 {
388     struct lyxml_attr *aiter, *aprev;
389 
390     if (!attr) {
391         return;
392     }
393 
394     if (parent) {
395         /* unlink attribute from the parent's list of attributes */
396         aprev = NULL;
397         for (aiter = parent->attr; aiter; aiter = aiter->next) {
398             if (aiter == attr) {
399                 break;
400             }
401             aprev = aiter;
402         }
403         if (!aiter) {
404             /* attribute to remove not found */
405             return;
406         }
407 
408         if (!aprev) {
409             /* attribute is first in parent's list of attributes */
410             parent->attr = attr->next;
411         } else {
412             /* reconnect previous attribute to the next */
413             aprev->next = attr->next;
414         }
415     }
416     lydict_remove(ctx, attr->name);
417     lydict_remove(ctx, attr->value);
418     if (attr->type == LYXML_ATTR_STD_UNRES) {
419         free((char *)attr->ns);
420     }
421     free(attr);
422 }
423 
424 void
lyxml_free_attrs(struct ly_ctx * ctx,struct lyxml_elem * elem)425 lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem)
426 {
427     struct lyxml_attr *a, *next;
428     if (!elem || !elem->attr) {
429         return;
430     }
431 
432     a = elem->attr;
433     do {
434         next = a->next;
435 
436         lydict_remove(ctx, a->name);
437         lydict_remove(ctx, a->value);
438         if (a->type == LYXML_ATTR_STD_UNRES) {
439             free((char *)a->ns);
440         }
441         free(a);
442 
443         a = next;
444     } while (a);
445 }
446 
447 static void
lyxml_free_elem(struct ly_ctx * ctx,struct lyxml_elem * elem)448 lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem)
449 {
450     struct lyxml_elem *e, *next;
451 
452     if (!elem) {
453         return;
454     }
455 
456     lyxml_free_attrs(ctx, elem);
457     LY_TREE_FOR_SAFE(elem->child, next, e) {
458         lyxml_free_elem(ctx, e);
459     }
460     lydict_remove(ctx, elem->name);
461     lydict_remove(ctx, elem->content);
462     free(elem);
463 }
464 
465 API void
lyxml_free(struct ly_ctx * ctx,struct lyxml_elem * elem)466 lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem)
467 {
468     FUN_IN;
469 
470     if (!elem) {
471         return;
472     }
473 
474     lyxml_unlink_elem(ctx, elem, 2);
475     lyxml_free_elem(ctx, elem);
476 }
477 
478 API void
lyxml_free_withsiblings(struct ly_ctx * ctx,struct lyxml_elem * elem)479 lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem)
480 {
481     FUN_IN;
482 
483     struct lyxml_elem *iter, *aux;
484 
485     if (!elem) {
486         return;
487     }
488 
489     /* optimization - avoid freeing (unlinking) the last node of the siblings list */
490     /* so, first, free the node's predecessors to the beginning of the list ... */
491     for(iter = elem->prev; iter->next; iter = aux) {
492         aux = iter->prev;
493         lyxml_free(ctx, iter);
494     }
495     /* ... then, the node is the first in the siblings list, so free them all */
496     LY_TREE_FOR_SAFE(elem, aux, iter) {
497         lyxml_free(ctx, iter);
498     }
499 }
500 
501 API const char *
lyxml_get_attr(const struct lyxml_elem * elem,const char * name,const char * ns)502 lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns)
503 {
504     FUN_IN;
505 
506     struct lyxml_attr *a;
507 
508     assert(elem);
509     assert(name);
510 
511     for (a = elem->attr; a; a = a->next) {
512         if (a->type != LYXML_ATTR_STD) {
513             continue;
514         }
515 
516         if (!strcmp(name, a->name)) {
517             if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) {
518                 return a->value;
519             }
520         }
521     }
522 
523     return NULL;
524 }
525 
526 int
lyxml_add_child(struct ly_ctx * ctx,struct lyxml_elem * parent,struct lyxml_elem * elem)527 lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem)
528 {
529     struct lyxml_elem *e;
530 
531     assert(parent);
532     assert(elem);
533 
534     /* (re)link element to parent */
535     if (elem->parent) {
536         lyxml_unlink_elem(ctx, elem, 1);
537     }
538     elem->parent = parent;
539 
540     /* link parent to element */
541     if (parent->child) {
542         e = parent->child;
543         elem->prev = e->prev;
544         elem->next = NULL;
545         elem->prev->next = elem;
546         e->prev = elem;
547     } else {
548         parent->child = elem;
549         elem->prev = elem;
550         elem->next = NULL;
551     }
552 
553     return EXIT_SUCCESS;
554 }
555 
556 int
lyxml_getutf8(struct ly_ctx * ctx,const char * buf,unsigned int * read)557 lyxml_getutf8(struct ly_ctx *ctx, const char *buf, unsigned int *read)
558 {
559     int c, aux;
560     int i;
561 
562     c = buf[0];
563     *read = 0;
564 
565     /* buf is NULL terminated string, so 0 means EOF */
566     if (!c) {
567         LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
568         return 0;
569     }
570     *read = 1;
571 
572     /* process character byte(s) */
573     if ((c & 0xf8) == 0xf0) {
574         /* four bytes character */
575         *read = 4;
576 
577         c &= 0x07;
578         for (i = 1; i <= 3; i++) {
579             aux = buf[i];
580             if ((aux & 0xc0) != 0x80) {
581                 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
582                 return 0;
583             }
584 
585             c = (c << 6) | (aux & 0x3f);
586         }
587 
588         if (c < 0x1000 || c > 0x10ffff) {
589             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
590             return 0;
591         }
592     } else if ((c & 0xf0) == 0xe0) {
593         /* three bytes character */
594         *read = 3;
595 
596         c &= 0x0f;
597         for (i = 1; i <= 2; i++) {
598             aux = buf[i];
599             if ((aux & 0xc0) != 0x80) {
600                 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
601                 return 0;
602             }
603 
604             c = (c << 6) | (aux & 0x3f);
605         }
606 
607         if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) {
608             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
609             return 0;
610         }
611     } else if ((c & 0xe0) == 0xc0) {
612         /* two bytes character */
613         *read = 2;
614 
615         aux = buf[1];
616         if ((aux & 0xc0) != 0x80) {
617             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
618             return 0;
619         }
620         c = ((c & 0x1f) << 6) | (aux & 0x3f);
621 
622         if (c < 0x80) {
623             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
624             return 0;
625         }
626     } else if (!(c & 0x80)) {
627         /* one byte character */
628         if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) {
629             /* invalid character */
630             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
631             return 0;
632         }
633     } else {
634         /* invalid character */
635         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character");
636         return 0;
637     }
638 
639     return c;
640 }
641 
642 /* logs directly */
643 static int
parse_ignore(struct ly_ctx * ctx,const char * data,const char * endstr,unsigned int * len)644 parse_ignore(struct ly_ctx *ctx, const char *data, const char *endstr, unsigned int *len)
645 {
646     unsigned int slen;
647     const char *c = data;
648 
649     slen = strlen(endstr);
650 
651     while (*c && strncmp(c, endstr, slen)) {
652         c++;
653     }
654     if (!*c) {
655         LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr);
656         return EXIT_FAILURE;
657     }
658     c += slen;
659 
660     *len = c - data;
661     return EXIT_SUCCESS;
662 }
663 
664 /* logs directly, fails when return == NULL and *len == 0 */
665 static char *
parse_text(struct ly_ctx * ctx,const char * data,char delim,unsigned int * len)666 parse_text(struct ly_ctx *ctx, const char *data, char delim, unsigned int *len)
667 {
668 #define BUFSIZE 1024
669 
670     char buf[BUFSIZE];
671     char *result = NULL;
672     unsigned int r;
673     int o, size = 0;
674     int cdsect = 0;
675     int32_t n;
676 
677     for (*len = o = 0; cdsect || data[*len] != delim; o++) {
678         if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) {
679             LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found");
680             goto error;
681         }
682 
683 loop:
684 
685         if (o > BUFSIZE - 4) {
686             /* add buffer into the result */
687             if (result) {
688                 size = size + o;
689                 result = ly_realloc(result, size + 1);
690             } else {
691                 size = o;
692                 result = malloc((size + 1) * sizeof *result);
693             }
694             LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
695             memcpy(&result[size - o], buf, o);
696 
697             /* write again into the beginning of the buffer */
698             o = 0;
699         }
700 
701         if (cdsect || !strncmp(&data[*len], "<![CDATA[", 9)) {
702             /* CDSect */
703             if (!cdsect) {
704                 cdsect = 1;
705                 *len += 9;
706             }
707             if (data[*len] && !strncmp(&data[*len], "]]>", 3)) {
708                 *len += 3;
709                 cdsect = 0;
710                 o--;            /* we don't write any data in this iteration */
711             } else {
712                 buf[o] = data[*len];
713                 (*len)++;
714             }
715         } else if (data[*len] == '&') {
716             (*len)++;
717             if (data[*len] != '#') {
718                 /* entity reference - only predefined refs are supported */
719                 if (!strncmp(&data[*len], "lt;", 3)) {
720                     buf[o] = '<';
721                     *len += 3;
722                 } else if (!strncmp(&data[*len], "gt;", 3)) {
723                     buf[o] = '>';
724                     *len += 3;
725                 } else if (!strncmp(&data[*len], "amp;", 4)) {
726                     buf[o] = '&';
727                     *len += 4;
728                 } else if (!strncmp(&data[*len], "apos;", 5)) {
729                     buf[o] = '\'';
730                     *len += 5;
731                 } else if (!strncmp(&data[*len], "quot;", 5)) {
732                     buf[o] = '\"';
733                     *len += 5;
734                 } else {
735                     LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)");
736                     goto error;
737                 }
738             } else {
739                 /* character reference */
740                 (*len)++;
741                 if (isdigit(data[*len])) {
742                     for (n = 0; isdigit(data[*len]); (*len)++) {
743                         n = (10 * n) + (data[*len] - '0');
744                     }
745                     if (data[*len] != ';') {
746                         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon");
747                         goto error;
748                     }
749                 } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) {
750                     for (n = 0; isxdigit(data[*len]); (*len)++) {
751                         if (isdigit(data[*len])) {
752                             r = (data[*len] - '0');
753                         } else if (data[*len] > 'F') {
754                             r = 10 + (data[*len] - 'a');
755                         } else {
756                             r = 10 + (data[*len] - 'A');
757                         }
758                         n = (16 * n) + r;
759                     }
760                 } else {
761                     LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference");
762                     goto error;
763 
764                 }
765                 r = pututf8(ctx, &buf[o], n);
766                 if (!r) {
767                     LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value");
768                     goto error;
769                 }
770                 o += r - 1;     /* o is ++ in for loop */
771                 (*len)++;
772             }
773         } else {
774             r = copyutf8(ctx, &buf[o], &data[*len]);
775             if (!r) {
776                 goto error;
777             }
778 
779             o += r - 1;     /* o is ++ in for loop */
780             (*len) = (*len) + r;
781         }
782     }
783 
784     if (delim == '<' && !strncmp(&data[*len], "<![CDATA[", 9)) {
785         /* ignore loop's end condition on beginning of CDSect */
786         goto loop;
787     }
788 #undef BUFSIZE
789 
790     if (o) {
791         if (result) {
792             size = size + o;
793             result = ly_realloc(result, size + 1);
794         } else {
795             size = o;
796             result = malloc((size + 1) * sizeof *result);
797         }
798         LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL);
799         memcpy(&result[size - o], buf, o);
800     }
801     if (result) {
802         result[size] = '\0';
803     } else {
804         size = 0;
805         result = strdup("");
806         LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL)
807     }
808 
809     return result;
810 
811 error:
812     *len = 0;
813     free(result);
814     return NULL;
815 }
816 
817 /* logs directly */
818 static struct lyxml_attr *
parse_attr(struct ly_ctx * ctx,const char * data,unsigned int * len,struct lyxml_elem * parent)819 parse_attr(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent)
820 {
821     const char *c = data, *start, *delim;
822     char *prefix = NULL, xml_flag, *str;
823     int uc;
824     struct lyxml_attr *attr = NULL, *a;
825     unsigned int size;
826 
827     /* check if it is attribute or namespace */
828     if (!strncmp(c, "xmlns", 5)) {
829         /* namespace */
830         attr = calloc(1, sizeof (struct lyxml_ns));
831         LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);
832 
833         attr->type = LYXML_ATTR_NS;
834         ((struct lyxml_ns *)attr)->parent = parent;
835         c += 5;
836         if (*c != ':') {
837             /* default namespace, prefix will be empty */
838             goto equal;
839         }
840         c++;                    /* go after ':' to the prefix value */
841     } else {
842         /* attribute */
843         attr = calloc(1, sizeof *attr);
844         LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL);
845 
846         attr->type = LYXML_ATTR_STD;
847     }
848 
849     /* process name part of the attribute */
850     start = c;
851     uc = lyxml_getutf8(ctx, c, &size);
852     if (!is_xmlnamestartchar(uc)) {
853         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute");
854         free(attr);
855         return NULL;
856     }
857     xml_flag = 4;
858     if (*c == 'x') {
859         xml_flag = 1;
860     }
861     c += size;
862     uc = lyxml_getutf8(ctx, c, &size);
863     while (is_xmlnamechar(uc)) {
864         if (attr->type == LYXML_ATTR_STD) {
865             if ((*c == ':') && (xml_flag != 3)) {
866                 /* attribute in a namespace (but disregard the special "xml" namespace) */
867                 start = c + 1;
868 
869                 /* look for the prefix in namespaces */
870                 if (prefix) {
871                     LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "prefix start, \":\" already parsed");
872                     goto error;
873                 }
874                 prefix = malloc((c - data + 1) * sizeof *prefix);
875                 LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
876                 memcpy(prefix, data, c - data);
877                 prefix[c - data] = '\0';
878                 attr->ns = lyxml_get_ns(parent, prefix);
879                 if (!attr->ns) {
880                     /* remember the prefix for later resolution */
881                     attr->type = LYXML_ATTR_STD_UNRES;
882                     attr->ns = (struct lyxml_ns *)prefix;
883                     prefix = NULL;
884                 }
885             } else if (((*c == 'm') && (xml_flag == 1)) ||
886                     ((*c == 'l') && (xml_flag == 2))) {
887                 ++xml_flag;
888             } else {
889                 xml_flag = 4;
890             }
891         }
892         c += size;
893         uc = lyxml_getutf8(ctx, c, &size);
894     }
895 
896     /* store the name */
897     size = c - start;
898     attr->name = lydict_insert(ctx, start, size);
899 
900 equal:
901     /* check Eq mark that can be surrounded by whitespaces */
902     ign_xmlws(c);
903     if (*c != '=') {
904         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected");
905         goto error;
906     }
907     c++;
908     ign_xmlws(c);
909 
910     /* process value part of the attribute */
911     if (!*c || (*c != '"' && *c != '\'')) {
912         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected");
913         goto error;
914     }
915     delim = c;
916     str = parse_text(ctx, ++c, *delim, &size);
917     if (!str && !size) {
918         goto error;
919     }
920     attr->value = lydict_insert_zc(ctx, str);
921 
922     *len = c + size + 1 - data; /* +1 is delimiter size */
923 
924     /* put attribute into the parent's attributes list */
925     if (parent->attr) {
926         /* go to the end of the list */
927         for (a = parent->attr; a->next; a = a->next);
928         /* and append new attribute */
929         a->next = attr;
930     } else {
931         /* add the first attribute in the list */
932         parent->attr = attr;
933     }
934 
935     free(prefix);
936     return attr;
937 
938 error:
939     lyxml_free_attr(ctx, NULL, attr);
940     free(prefix);
941     return NULL;
942 }
943 
944 /* logs directly */
945 struct lyxml_elem *
lyxml_parse_elem(struct ly_ctx * ctx,const char * data,unsigned int * len,struct lyxml_elem * parent,int options,int bt_count)946 lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent, int options,
947                  int bt_count)
948 {
949     const char *c = data, *start, *e;
950     const char *lws;    /* leading white space for handling mixed content */
951     int uc;
952     char *str;
953     char *prefix = NULL;
954     unsigned int prefix_len = 0;
955     struct lyxml_elem *elem = NULL, *child;
956     struct lyxml_attr *attr;
957     unsigned int size;
958     int nons_flag = 0, closed_flag = 0;
959 
960     *len = 0;
961 
962     if (bt_count > LY_RECURSION_LIMIT) {
963         LOGVAL(ctx, LYE_SPEC, LY_VLOG_NONE, NULL, "Recursion limit %d reached", LY_RECURSION_LIMIT);
964         return NULL;
965     }
966 
967     if (*c != '<') {
968         return NULL;
969     }
970 
971     /* locate element name */
972     c++;
973     e = c;
974 
975     uc = lyxml_getutf8(ctx, e, &size);
976     if (!is_xmlnamestartchar(uc)) {
977         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element");
978         return NULL;
979     }
980     e += size;
981     uc = lyxml_getutf8(ctx, e, &size);
982     while (is_xmlnamechar(uc)) {
983         if (*e == ':') {
984             if (prefix_len) {
985                 LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found");
986                 goto error;
987             }
988             /* element in a namespace */
989             start = e + 1;
990 
991             /* look for the prefix in namespaces */
992             prefix_len = e - c;
993             LY_CHECK_ERR_GOTO(prefix, LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, e), error);
994             prefix = malloc((prefix_len + 1) * sizeof *prefix);
995             LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error);
996             memcpy(prefix, c, prefix_len);
997             prefix[prefix_len] = '\0';
998             c = start;
999         }
1000         e += size;
1001         uc = lyxml_getutf8(ctx, e, &size);
1002     }
1003     if (!*e) {
1004         LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
1005         free(prefix);
1006         return NULL;
1007     }
1008 
1009     /* allocate element structure */
1010     elem = calloc(1, sizeof *elem);
1011     LY_CHECK_ERR_RETURN(!elem, free(prefix); LOGMEM(ctx), NULL);
1012 
1013     elem->next = NULL;
1014     elem->prev = elem;
1015     if (parent) {
1016         lyxml_add_child(ctx, parent, elem);
1017     }
1018 
1019     /* store the name into the element structure */
1020     elem->name = lydict_insert(ctx, c, e - c);
1021     c = e;
1022 
1023 process:
1024     ign_xmlws(c);
1025     if (!strncmp("/>", c, 2)) {
1026         /* we are done, it was EmptyElemTag */
1027         c += 2;
1028         elem->content = lydict_insert(ctx, "", 0);
1029         closed_flag = 1;
1030     } else if (*c == '>') {
1031         /* process element content */
1032         c++;
1033         lws = NULL;
1034 
1035         while (*c) {
1036             if (!strncmp(c, "</", 2)) {
1037                 if (lws && !elem->child) {
1038                     /* leading white spaces were actually content */
1039                     goto store_content;
1040                 }
1041 
1042                 /* Etag */
1043                 c += 2;
1044                 /* get name and check it */
1045                 e = c;
1046                 uc = lyxml_getutf8(ctx, e, &size);
1047                 if (!is_xmlnamestartchar(uc)) {
1048                     LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element");
1049                     goto error;
1050                 }
1051                 e += size;
1052                 uc = lyxml_getutf8(ctx, e, &size);
1053                 while (is_xmlnamechar(uc)) {
1054                     if (*e == ':') {
1055                         /* element in a namespace */
1056                         start = e + 1;
1057 
1058                         /* look for the prefix in namespaces */
1059                         if (!prefix || memcmp(prefix, c, e - c)) {
1060                             LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
1061                                    "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name);
1062                             goto error;
1063                         }
1064                         c = start;
1065                     }
1066                     e += size;
1067                     uc = lyxml_getutf8(ctx, e, &size);
1068                 }
1069                 if (!*e) {
1070                     LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL);
1071                     goto error;
1072                 }
1073 
1074                 /* check that it corresponds to opening tag */
1075                 size = e - c;
1076                 str = malloc((size + 1) * sizeof *str);
1077                 LY_CHECK_ERR_GOTO(!str, LOGMEM(ctx), error);
1078                 memcpy(str, c, e - c);
1079                 str[e - c] = '\0';
1080                 if (size != strlen(elem->name) || memcmp(str, elem->name, size)) {
1081                     LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem,
1082                            "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str);
1083                     free(str);
1084                     goto error;
1085                 }
1086                 free(str);
1087                 c = e;
1088 
1089                 ign_xmlws(c);
1090                 if (*c != '>') {
1091                     LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name);
1092                     goto error;
1093                 }
1094                 c++;
1095                 if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) {
1096                     /* there was no content, but we don't want NULL (only if mixed content) */
1097                     elem->content = lydict_insert(ctx, "", 0);
1098                 }
1099                 closed_flag = 1;
1100                 break;
1101 
1102             } else if (!strncmp(c, "<?", 2)) {
1103                 if (lws) {
1104                     /* leading white spaces were only formatting */
1105                     lws = NULL;
1106                 }
1107                 /* PI - ignore it */
1108                 c += 2;
1109                 if (parse_ignore(ctx, c, "?>", &size)) {
1110                     goto error;
1111                 }
1112                 c += size;
1113             } else if (!strncmp(c, "<!--", 4)) {
1114                 if (lws) {
1115                     /* leading white spaces were only formatting */
1116                     lws = NULL;
1117                 }
1118                 /* Comment - ignore it */
1119                 c += 4;
1120                 if (parse_ignore(ctx, c, "-->", &size)) {
1121                     goto error;
1122                 }
1123                 c += size;
1124             } else if (!strncmp(c, "<![CDATA[", 9)) {
1125                 /* CDSect */
1126                 goto store_content;
1127             } else if (*c == '<') {
1128                 if (lws) {
1129                     if (elem->flags & LYXML_ELEM_MIXED) {
1130                         /* we have a mixed content */
1131                         goto store_content;
1132                     } else {
1133                         /* leading white spaces were only formatting */
1134                         lws = NULL;
1135                     }
1136                 }
1137                 if (elem->content) {
1138                     /* we have a mixed content */
1139                     if (options & LYXML_PARSE_NOMIXEDCONTENT) {
1140                         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
1141                         goto error;
1142                     }
1143                     child = calloc(1, sizeof *child);
1144                     LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
1145                     child->content = elem->content;
1146                     elem->content = NULL;
1147                     lyxml_add_child(ctx, elem, child);
1148                     elem->flags |= LYXML_ELEM_MIXED;
1149                 }
1150                 child = lyxml_parse_elem(ctx, c, &size, elem, options, bt_count + 1);
1151                 if (!child) {
1152                     goto error;
1153                 }
1154                 c += size;      /* move after processed child element */
1155             } else if (is_xmlws(*c)) {
1156                 lws = c;
1157                 ign_xmlws(c);
1158             } else {
1159 store_content:
1160                 /* store text content */
1161                 if (lws) {
1162                     /* process content including the leading white spaces */
1163                     c = lws;
1164                     lws = NULL;
1165                 }
1166                 str = parse_text(ctx, c, '<', &size);
1167                 if (!str && !size) {
1168                     goto error;
1169                 }
1170                 elem->content = lydict_insert_zc(ctx, str);
1171                 c += size;      /* move after processed text content */
1172 
1173                 if (elem->child) {
1174                     /* we have a mixed content */
1175                     if (options & LYXML_PARSE_NOMIXEDCONTENT) {
1176                         LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content");
1177                         goto error;
1178                     }
1179                     child = calloc(1, sizeof *child);
1180                     LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error);
1181                     child->content = elem->content;
1182                     elem->content = NULL;
1183                     lyxml_add_child(ctx, elem, child);
1184                     elem->flags |= LYXML_ELEM_MIXED;
1185                 }
1186             }
1187         }
1188     } else {
1189         /* process attribute */
1190         attr = parse_attr(ctx, c, &size, elem);
1191         if (!attr) {
1192             goto error;
1193         }
1194         c += size;              /* move after processed attribute */
1195 
1196         /* check namespace */
1197         if (attr->type == LYXML_ATTR_NS) {
1198             if ((!prefix || !prefix[0]) && !attr->name) {
1199                 if (attr->value) {
1200                     /* default prefix */
1201                     elem->ns = (struct lyxml_ns *)attr;
1202                 } else {
1203                     /* xmlns="" -> no namespace */
1204                     nons_flag = 1;
1205                 }
1206             } else if (prefix && prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) {
1207                 /* matching namespace with prefix */
1208                 elem->ns = (struct lyxml_ns *)attr;
1209             }
1210         }
1211 
1212         /* go back to finish element processing */
1213         goto process;
1214     }
1215 
1216     *len = c - data;
1217 
1218     if (!closed_flag) {
1219         LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name);
1220         goto error;
1221     }
1222 
1223     /* resolve all attribute prefixes */
1224     LY_TREE_FOR(elem->attr, attr) {
1225         if (attr->type == LYXML_ATTR_STD_UNRES) {
1226             str = (char *)attr->ns;
1227             attr->ns = lyxml_get_ns(elem, str);
1228             free(str);
1229             attr->type = LYXML_ATTR_STD;
1230         }
1231     }
1232 
1233     if (!elem->ns && !nons_flag && parent) {
1234         elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL);
1235     }
1236     free(prefix);
1237     return elem;
1238 
1239 error:
1240     lyxml_free(ctx, elem);
1241     free(prefix);
1242     return NULL;
1243 }
1244 
1245 /* logs directly */
1246 API struct lyxml_elem *
lyxml_parse_mem(struct ly_ctx * ctx,const char * data,int options)1247 lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options)
1248 {
1249     FUN_IN;
1250 
1251     const char *c = data;
1252     unsigned int len;
1253     struct lyxml_elem *root, *first = NULL, *next;
1254 
1255     if (!ctx) {
1256         LOGARG;
1257         return NULL;
1258     }
1259 
1260     if (!data) {
1261         /* nothing to parse */
1262         return NULL;
1263     }
1264 
1265 repeat:
1266     /* process document */
1267     while (1) {
1268         if (!*c) {
1269             /* eof */
1270             return first;
1271         } else if (is_xmlws(*c)) {
1272             /* skip whitespaces */
1273             ign_xmlws(c);
1274         } else if (!strncmp(c, "<?", 2)) {
1275             /* XMLDecl or PI - ignore it */
1276             c += 2;
1277             if (parse_ignore(ctx, c, "?>", &len)) {
1278                 goto error;
1279             }
1280             c += len;
1281         } else if (!strncmp(c, "<!--", 4)) {
1282             /* Comment - ignore it */
1283             c += 2;
1284             if (parse_ignore(ctx, c, "-->", &len)) {
1285                 goto error;
1286             }
1287             c += len;
1288         } else if (!strncmp(c, "<!", 2)) {
1289             /* DOCTYPE */
1290             /* TODO - standalone ignore counting < and > */
1291             LOGERR(ctx, LY_EINVAL, "DOCTYPE not supported in XML documents.");
1292             goto error;
1293         } else if (*c == '<') {
1294             /* element - process it in next loop to strictly follow XML
1295              * format
1296              */
1297             break;
1298         } else {
1299             LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c);
1300             goto error;
1301         }
1302     }
1303 
1304     root = lyxml_parse_elem(ctx, c, &len, NULL, options, 0);
1305     if (!root) {
1306         goto error;
1307     } else if (!first) {
1308         first = root;
1309     } else {
1310         first->prev->next = root;
1311         root->prev = first->prev;
1312         first->prev = root;
1313     }
1314     c += len;
1315 
1316     /* ignore the rest of document where can be comments, PIs and whitespaces,
1317      * note that we are not detecting syntax errors in these parts
1318      */
1319     ign_xmlws(c);
1320     if (*c) {
1321         if (options & LYXML_PARSE_MULTIROOT) {
1322             goto repeat;
1323         } else {
1324             LOGWRN(ctx, "There are some not parsed data:\n%s", c);
1325         }
1326     }
1327 
1328     return first;
1329 
1330 error:
1331     LY_TREE_FOR_SAFE(first, next, root) {
1332         lyxml_free(ctx, root);
1333     }
1334     return NULL;
1335 }
1336 
1337 API struct lyxml_elem *
lyxml_parse_path(struct ly_ctx * ctx,const char * filename,int options)1338 lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options)
1339 {
1340     FUN_IN;
1341 
1342     struct lyxml_elem *elem = NULL;
1343     size_t length;
1344     int fd;
1345     char *addr;
1346 
1347     if (!filename || !ctx) {
1348         LOGARG;
1349         return NULL;
1350     }
1351 
1352     fd = open(filename, O_RDONLY);
1353     if (fd == -1) {
1354         LOGERR(ctx, LY_EINVAL,"Opening file \"%s\" failed.", filename);
1355         return NULL;
1356     }
1357     if (lyp_mmap(ctx, fd, 0, &length, (void **)&addr)) {
1358         LOGERR(ctx, LY_ESYS, "Mapping file descriptor into memory failed (%s()).", __func__);
1359         goto error;
1360     } else if (!addr) {
1361         /* empty XML file */
1362         goto error;
1363     }
1364 
1365     elem = lyxml_parse_mem(ctx, addr, options);
1366     lyp_munmap(addr, length);
1367     close(fd);
1368 
1369     return elem;
1370 
1371 error:
1372     if (fd != -1) {
1373         close(fd);
1374     }
1375 
1376     return NULL;
1377 }
1378 
1379 int
lyxml_dump_text(struct lyout * out,const char * text,LYXML_DATA_TYPE type)1380 lyxml_dump_text(struct lyout *out, const char *text, LYXML_DATA_TYPE type)
1381 {
1382     unsigned int i, n;
1383 
1384     if (!text) {
1385         return 0;
1386     }
1387 
1388     for (i = n = 0; text[i]; i++) {
1389         switch (text[i]) {
1390         case '&':
1391             n += ly_print(out, "&amp;");
1392             break;
1393         case '<':
1394             n += ly_print(out, "&lt;");
1395             break;
1396         case '>':
1397             /* not needed, just for readability */
1398             n += ly_print(out, "&gt;");
1399             break;
1400         case '"':
1401             if (type == LYXML_DATA_ATTR) {
1402                 n += ly_print(out, "&quot;");
1403                 break;
1404             }
1405             /* falls through */
1406         default:
1407             ly_write(out, &text[i], 1);
1408             n++;
1409         }
1410     }
1411 
1412     return n;
1413 }
1414 
1415 static int
dump_elem(struct lyout * out,const struct lyxml_elem * e,int level,int options,int last_elem)1416 dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem)
1417 {
1418     int size = 0;
1419     struct lyxml_attr *a;
1420     struct lyxml_elem *child;
1421     const char *delim, *delim_outer;
1422     int indent;
1423 
1424     if (!e->name) {
1425         /* mixed content */
1426         if (e->content) {
1427             return lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);
1428         } else {
1429             return 0;
1430         }
1431     }
1432 
1433     delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : "";
1434     indent = 2 * level;
1435     if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) {
1436         delim = "";
1437     }
1438     if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) {
1439         delim_outer = "";
1440         indent = 0;
1441     }
1442     if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) {
1443         delim_outer = "";
1444     }
1445 
1446     if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN))  {
1447         /* opening tag */
1448         if (e->ns && e->ns->prefix) {
1449             size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name);
1450         } else {
1451             size += ly_print(out, "%*s<%s", indent, "", e->name);
1452         }
1453     } else if (options & LYXML_PRINT_CLOSE) {
1454         indent = 0;
1455         goto close;
1456     }
1457 
1458     /* attributes */
1459     for (a = e->attr; a; a = a->next) {
1460         if (a->type == LYXML_ATTR_NS) {
1461             if (a->name) {
1462                 size += ly_print(out, " xmlns:%s=\"", a->name);
1463             } else {
1464                 size += ly_print(out, " xmlns=\"");
1465             }
1466         } else if (a->ns && a->ns->prefix) {
1467             size += ly_print(out, " %s:%s=\"", a->ns->prefix, a->name);
1468         } else {
1469             size += ly_print(out, " %s=\"", a->name);
1470         }
1471 
1472         if (a->value) {
1473             size += lyxml_dump_text(out, a->value, LYXML_DATA_ATTR);
1474         } else {
1475             size += ly_print(out, "&quot;&quot;");
1476         }
1477         size += ly_print(out, "\"");
1478     }
1479 
1480     /* apply options */
1481     if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) {
1482         size += ly_print(out, "/>%s", delim);
1483         return size;
1484     } else if (options & LYXML_PRINT_OPEN) {
1485         ly_print(out, ">");
1486         return ++size;
1487     } else if (options & LYXML_PRINT_ATTRS) {
1488         return size;
1489     }
1490 
1491     if (!e->child && (!e->content || !e->content[0])) {
1492         size += ly_print(out, "/>%s", delim);
1493         return size;
1494     } else if (e->content && e->content[0]) {
1495         ly_print(out, ">");
1496         size++;
1497 
1498         size += lyxml_dump_text(out, e->content, LYXML_DATA_ELEM);
1499 
1500         if (e->ns && e->ns->prefix) {
1501             size += ly_print(out, "</%s:%s>%s", e->ns->prefix, e->name, delim);
1502         } else {
1503             size += ly_print(out, "</%s>%s", e->name, delim);
1504         }
1505         return size;
1506     } else {
1507         size += ly_print(out, ">%s", delim);
1508     }
1509 
1510     /* go recursively */
1511     LY_TREE_FOR(e->child, child) {
1512         if (options & LYXML_PRINT_FORMAT) {
1513             size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0);
1514         } else {
1515             size += dump_elem(out, child, level, 0, 0);
1516         }
1517     }
1518 
1519 close:
1520     /* closing tag */
1521     if (e->ns && e->ns->prefix) {
1522         size += ly_print(out, "%*s</%s:%s>%s", indent, "", e->ns->prefix, e->name, delim_outer);
1523     } else {
1524         size += ly_print(out, "%*s</%s>%s", indent, "", e->name, delim_outer);
1525     }
1526 
1527     return size;
1528 }
1529 
1530 static int
dump_siblings(struct lyout * out,const struct lyxml_elem * e,int options)1531 dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options)
1532 {
1533     const struct lyxml_elem *start, *iter, *next;
1534     int ret = 0;
1535 
1536     if (e->parent) {
1537         start = e->parent->child;
1538     } else {
1539         start = e;
1540         while(start->prev && start->prev->next) {
1541             start = start->prev;
1542         }
1543     }
1544 
1545     LY_TREE_FOR_SAFE(start, next, iter) {
1546         ret += dump_elem(out, iter, 0, options, (next ? 0 : 1));
1547     }
1548 
1549     return ret;
1550 }
1551 
1552 API int
lyxml_print_file(FILE * stream,const struct lyxml_elem * elem,int options)1553 lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options)
1554 {
1555     FUN_IN;
1556 
1557     struct lyout out;
1558 
1559     if (!stream || !elem) {
1560         return 0;
1561     }
1562 
1563     memset(&out, 0, sizeof out);
1564 
1565     out.type = LYOUT_STREAM;
1566     out.method.f = stream;
1567 
1568     if (options & LYXML_PRINT_SIBLINGS) {
1569         return dump_siblings(&out, elem, options);
1570     } else {
1571         return dump_elem(&out, elem, 0, options, 1);
1572     }
1573 }
1574 
1575 API int
lyxml_print_fd(int fd,const struct lyxml_elem * elem,int options)1576 lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options)
1577 {
1578     FUN_IN;
1579 
1580     struct lyout out;
1581 
1582     if (fd < 0 || !elem) {
1583         return 0;
1584     }
1585 
1586     memset(&out, 0, sizeof out);
1587 
1588     out.type = LYOUT_FD;
1589     out.method.fd = fd;
1590 
1591     if (options & LYXML_PRINT_SIBLINGS) {
1592         return dump_siblings(&out, elem, options);
1593     } else {
1594         return dump_elem(&out, elem, 0, options, 1);
1595     }
1596 }
1597 
1598 API int
lyxml_print_mem(char ** strp,const struct lyxml_elem * elem,int options)1599 lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options)
1600 {
1601     FUN_IN;
1602 
1603     struct lyout out;
1604     int r;
1605 
1606     if (!strp || !elem) {
1607         return 0;
1608     }
1609 
1610     memset(&out, 0, sizeof out);
1611 
1612     out.type = LYOUT_MEMORY;
1613 
1614     if (options & LYXML_PRINT_SIBLINGS) {
1615         r = dump_siblings(&out, elem, options);
1616     } else {
1617         r = dump_elem(&out, elem, 0, options, 1);
1618     }
1619 
1620     *strp = out.method.mem.buf;
1621     return r;
1622 }
1623 
1624 API int
lyxml_print_clb(ssize_t (* writeclb)(void * arg,const void * buf,size_t count),void * arg,const struct lyxml_elem * elem,int options)1625 lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options)
1626 {
1627     FUN_IN;
1628 
1629     struct lyout out;
1630 
1631     if (!writeclb || !elem) {
1632         return 0;
1633     }
1634 
1635     memset(&out, 0, sizeof out);
1636 
1637     out.type = LYOUT_CALLBACK;
1638     out.method.clb.f = writeclb;
1639     out.method.clb.arg = arg;
1640 
1641     if (options & LYXML_PRINT_SIBLINGS) {
1642         return dump_siblings(&out, elem, options);
1643     } else {
1644         return dump_elem(&out, elem, 0, options, 1);
1645     }
1646 }
1647