/** * @file xml.c * @author Radek Krejci * @brief XML parser implementation for libyang * * Copyright (c) 2015 CESNET, z.s.p.o. * * This source code is licensed under BSD 3-Clause License (the "License"). * You may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://opensource.org/licenses/BSD-3-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #include "hash_table.h" #include "printer.h" #include "parser.h" #include "tree_schema.h" #include "xml_internal.h" #include "xpath.h" #define ign_xmlws(p) \ while (is_xmlws(*p)) { \ p++; \ } static struct lyxml_attr *lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr); API const struct lyxml_ns * lyxml_get_ns(const struct lyxml_elem *elem, const char *prefix) { FUN_IN; struct lyxml_attr *attr; if (!elem) { return NULL; } for (attr = elem->attr; attr; attr = attr->next) { if (attr->type != LYXML_ATTR_NS) { continue; } if (!attr->name) { if (!prefix) { /* default namespace found */ if (!attr->value) { /* empty default namespace -> no default namespace */ return NULL; } return (struct lyxml_ns *)attr; } } else if (prefix && !strcmp(attr->name, prefix)) { /* prefix found */ return (struct lyxml_ns *)attr; } } /* go recursively */ return lyxml_get_ns(elem->parent, prefix); } static void lyxml_correct_attr_ns(struct ly_ctx *ctx, struct lyxml_attr *attr, struct lyxml_elem *attr_parent, int copy_ns) { const struct lyxml_ns *tmp_ns; struct lyxml_elem *ns_root, *attr_root; if ((attr->type != LYXML_ATTR_NS) && attr->ns) { /* find the root of attr */ for (attr_root = attr_parent; attr_root->parent; attr_root = attr_root->parent); /* find the root of attr NS */ for (ns_root = attr->ns->parent; ns_root->parent; ns_root = ns_root->parent); /* attr NS is defined outside attr parent subtree */ if (ns_root != attr_root) { if (copy_ns) { tmp_ns = attr->ns; /* we may have already copied the NS over? */ attr->ns = lyxml_get_ns(attr_parent, tmp_ns->prefix); /* we haven't copied it over, copy it now */ if (!attr->ns) { attr->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, attr_parent, (struct lyxml_attr *)tmp_ns); } } else { attr->ns = NULL; } } } } static struct lyxml_attr * lyxml_dup_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr) { struct lyxml_attr *result, *a; if (!attr || !parent) { return NULL; } if (attr->type == LYXML_ATTR_NS) { /* this is correct, despite that all attributes seems like a standard * attributes (struct lyxml_attr), some of them can be namespace * definitions (and in that case they are struct lyxml_ns). */ result = (struct lyxml_attr *)calloc(1, sizeof (struct lyxml_ns)); } else { result = calloc(1, sizeof (struct lyxml_attr)); } LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL); result->value = lydict_insert(ctx, attr->value, 0); result->name = lydict_insert(ctx, attr->name, 0); result->type = attr->type; /* set namespace in case of standard attributes */ if (result->type == LYXML_ATTR_STD && attr->ns) { result->ns = attr->ns; lyxml_correct_attr_ns(ctx, result, parent, 1); } /* set parent pointer in case of namespace attribute */ if (result->type == LYXML_ATTR_NS) { ((struct lyxml_ns *)result)->parent = parent; } /* put attribute into the parent's attributes list */ if (parent->attr) { /* go to the end of the list */ for (a = parent->attr; a->next; a = a->next); /* and append new attribute */ a->next = result; } else { /* add the first attribute in the list */ parent->attr = result; } return result; } static void lyxml_correct_content_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *orig) { const char *end, *cur_expr; char *prefix; uint16_t i; size_t pref_len; const struct lyxml_ns *ns; struct lyxp_expr *exp; enum int_log_opts prev_ilo; /* it may not be a valid XPath expression */ ly_ilo_change(NULL, ILO_IGNORE, &prev_ilo, NULL); exp = lyxp_parse_expr(ctx, elem->content); ly_ilo_restore(NULL, prev_ilo, NULL, 0); if (!exp) { goto cleanup; } for (i = 0; i < exp->used; ++i) { cur_expr = &exp->expr[exp->expr_pos[i]]; if ((exp->tokens[i] == LYXP_TOKEN_NAMETEST) && (end = strnchr(cur_expr, ':', exp->tok_len[i]))) { /* get the prefix */ pref_len = end - cur_expr; prefix = strndup(cur_expr, pref_len); if (!prefix) { LOGMEM(ctx); goto cleanup; } ns = lyxml_get_ns(elem, prefix); /* we already have the namespace */ if (ns) { free(prefix); continue; } /* find the namespace in the original XML */ ns = lyxml_get_ns(orig, prefix); free(prefix); /* copy the namespace over, if any */ if (ns && !lyxml_dup_attr(ctx, elem, (struct lyxml_attr *)ns)) { LOGINT(ctx); goto cleanup; } } } cleanup: lyxp_expr_free(exp); } void lyxml_correct_elem_ns(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *orig, int copy_ns, int correct_attrs) { const struct lyxml_ns *tmp_ns; struct lyxml_elem *elem_root, *ns_root, *tmp, *iter; struct lyxml_attr *attr; /* find the root of elem */ for (elem_root = elem; elem_root->parent; elem_root = elem_root->parent); LY_TREE_DFS_BEGIN(elem, tmp, iter) { if (iter->ns) { /* find the root of elem NS */ for (ns_root = iter->ns->parent; ns_root; ns_root = ns_root->parent); /* elem NS is defined outside elem subtree */ if (ns_root != elem_root) { if (copy_ns) { tmp_ns = iter->ns; /* we may have already copied the NS over? */ iter->ns = lyxml_get_ns(iter, tmp_ns->prefix); /* we haven't copied it over, copy it now */ if (!iter->ns) { iter->ns = (struct lyxml_ns *)lyxml_dup_attr(ctx, iter, (struct lyxml_attr *)tmp_ns); } } else { iter->ns = NULL; } } } if (iter->content && iter->content[0] && copy_ns) { lyxml_correct_content_ns(ctx, iter, orig); } if (correct_attrs) { LY_TREE_FOR(iter->attr, attr) { lyxml_correct_attr_ns(ctx, attr, elem_root, copy_ns); } } LY_TREE_DFS_END(elem, tmp, iter); } } struct lyxml_elem * lyxml_dup_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, struct lyxml_elem *parent, int recursive, int with_siblings) { struct lyxml_elem *dup, *result = NULL; struct lyxml_attr *attr; if (!elem) { return NULL; } LY_TREE_FOR(elem, elem) { dup = calloc(1, sizeof *dup); LY_CHECK_ERR_RETURN(!dup, LOGMEM(ctx), NULL); dup->content = lydict_insert(ctx, elem->content, 0); dup->name = lydict_insert(ctx, elem->name, 0); dup->flags = elem->flags; dup->prev = dup; if (parent) { lyxml_add_child(ctx, parent, dup); } else if (result) { dup->prev = result->prev; dup->prev->next = dup; result->prev = dup; } /* keep old namespace for now */ dup->ns = elem->ns; /* duplicate attributes */ for (attr = elem->attr; attr; attr = attr->next) { lyxml_dup_attr(ctx, dup, attr); } /* correct namespaces */ lyxml_correct_elem_ns(ctx, dup, elem, 1, 0); if (recursive) { /* duplicate children */ lyxml_dup_elem(ctx, elem->child, dup, 1, 1); } /* set result (first sibling) */ if (!result) { result = dup; } if (!with_siblings) { break; } } return result; } API struct lyxml_elem * lyxml_dup(struct ly_ctx *ctx, struct lyxml_elem *root) { FUN_IN; return lyxml_dup_elem(ctx, root, NULL, 1, 0); } void lyxml_unlink_elem(struct ly_ctx *ctx, struct lyxml_elem *elem, int copy_ns) { struct lyxml_elem *parent, *first; if (!elem) { return; } /* store pointers to important nodes */ parent = elem->parent; /* unlink from parent */ if (parent) { if (parent->child == elem) { /* we unlink the first child */ /* update the parent's link */ parent->child = elem->next; } /* forget about the parent */ elem->parent = NULL; } if (copy_ns < 2) { lyxml_correct_elem_ns(ctx, elem, parent, copy_ns, 1); } /* unlink from siblings */ if (elem->prev == elem) { /* there are no more siblings */ return; } if (elem->next) { elem->next->prev = elem->prev; } else { /* unlinking the last element */ if (parent) { first = parent->child; } else { first = elem; while (first->prev->next) { first = first->prev; } } first->prev = elem->prev; } if (elem->prev->next) { elem->prev->next = elem->next; } /* clean up the unlinked element */ elem->next = NULL; elem->prev = elem; } API void lyxml_unlink(struct ly_ctx *ctx, struct lyxml_elem *elem) { FUN_IN; if (!elem) { return; } lyxml_unlink_elem(ctx, elem, 1); } void lyxml_free_attr(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_attr *attr) { struct lyxml_attr *aiter, *aprev; if (!attr) { return; } if (parent) { /* unlink attribute from the parent's list of attributes */ aprev = NULL; for (aiter = parent->attr; aiter; aiter = aiter->next) { if (aiter == attr) { break; } aprev = aiter; } if (!aiter) { /* attribute to remove not found */ return; } if (!aprev) { /* attribute is first in parent's list of attributes */ parent->attr = attr->next; } else { /* reconnect previous attribute to the next */ aprev->next = attr->next; } } lydict_remove(ctx, attr->name); lydict_remove(ctx, attr->value); if (attr->type == LYXML_ATTR_STD_UNRES) { free((char *)attr->ns); } free(attr); } void lyxml_free_attrs(struct ly_ctx *ctx, struct lyxml_elem *elem) { struct lyxml_attr *a, *next; if (!elem || !elem->attr) { return; } a = elem->attr; do { next = a->next; lydict_remove(ctx, a->name); lydict_remove(ctx, a->value); if (a->type == LYXML_ATTR_STD_UNRES) { free((char *)a->ns); } free(a); a = next; } while (a); } static void lyxml_free_elem(struct ly_ctx *ctx, struct lyxml_elem *elem) { struct lyxml_elem *e, *next; if (!elem) { return; } lyxml_free_attrs(ctx, elem); LY_TREE_FOR_SAFE(elem->child, next, e) { lyxml_free_elem(ctx, e); } lydict_remove(ctx, elem->name); lydict_remove(ctx, elem->content); free(elem); } API void lyxml_free(struct ly_ctx *ctx, struct lyxml_elem *elem) { FUN_IN; if (!elem) { return; } lyxml_unlink_elem(ctx, elem, 2); lyxml_free_elem(ctx, elem); } API void lyxml_free_withsiblings(struct ly_ctx *ctx, struct lyxml_elem *elem) { FUN_IN; struct lyxml_elem *iter, *aux; if (!elem) { return; } /* optimization - avoid freeing (unlinking) the last node of the siblings list */ /* so, first, free the node's predecessors to the beginning of the list ... */ for(iter = elem->prev; iter->next; iter = aux) { aux = iter->prev; lyxml_free(ctx, iter); } /* ... then, the node is the first in the siblings list, so free them all */ LY_TREE_FOR_SAFE(elem, aux, iter) { lyxml_free(ctx, iter); } } API const char * lyxml_get_attr(const struct lyxml_elem *elem, const char *name, const char *ns) { FUN_IN; struct lyxml_attr *a; assert(elem); assert(name); for (a = elem->attr; a; a = a->next) { if (a->type != LYXML_ATTR_STD) { continue; } if (!strcmp(name, a->name)) { if ((!ns && !a->ns) || (ns && a->ns && !strcmp(ns, a->ns->value))) { return a->value; } } } return NULL; } int lyxml_add_child(struct ly_ctx *ctx, struct lyxml_elem *parent, struct lyxml_elem *elem) { struct lyxml_elem *e; assert(parent); assert(elem); /* (re)link element to parent */ if (elem->parent) { lyxml_unlink_elem(ctx, elem, 1); } elem->parent = parent; /* link parent to element */ if (parent->child) { e = parent->child; elem->prev = e->prev; elem->next = NULL; elem->prev->next = elem; e->prev = elem; } else { parent->child = elem; elem->prev = elem; elem->next = NULL; } return EXIT_SUCCESS; } int lyxml_getutf8(struct ly_ctx *ctx, const char *buf, unsigned int *read) { int c, aux; int i; c = buf[0]; *read = 0; /* buf is NULL terminated string, so 0 means EOF */ if (!c) { LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL); return 0; } *read = 1; /* process character byte(s) */ if ((c & 0xf8) == 0xf0) { /* four bytes character */ *read = 4; c &= 0x07; for (i = 1; i <= 3; i++) { aux = buf[i]; if ((aux & 0xc0) != 0x80) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } c = (c << 6) | (aux & 0x3f); } if (c < 0x1000 || c > 0x10ffff) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } } else if ((c & 0xf0) == 0xe0) { /* three bytes character */ *read = 3; c &= 0x0f; for (i = 1; i <= 2; i++) { aux = buf[i]; if ((aux & 0xc0) != 0x80) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } c = (c << 6) | (aux & 0x3f); } if (c < 0x800 || (c > 0xd7ff && c < 0xe000) || c > 0xfffd) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } } else if ((c & 0xe0) == 0xc0) { /* two bytes character */ *read = 2; aux = buf[1]; if ((aux & 0xc0) != 0x80) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } c = ((c & 0x1f) << 6) | (aux & 0x3f); if (c < 0x80) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } } else if (!(c & 0x80)) { /* one byte character */ if (c < 0x20 && c != 0x9 && c != 0xa && c != 0xd) { /* invalid character */ LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } } else { /* invalid character */ LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "input character"); return 0; } return c; } /* logs directly */ static int parse_ignore(struct ly_ctx *ctx, const char *data, const char *endstr, unsigned int *len) { unsigned int slen; const char *c = data; slen = strlen(endstr); while (*c && strncmp(c, endstr, slen)) { c++; } if (!*c) { LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_NONE, NULL, "closing sequence", endstr); return EXIT_FAILURE; } c += slen; *len = c - data; return EXIT_SUCCESS; } /* logs directly, fails when return == NULL and *len == 0 */ static char * parse_text(struct ly_ctx *ctx, const char *data, char delim, unsigned int *len) { #define BUFSIZE 1024 char buf[BUFSIZE]; char *result = NULL; unsigned int r; int o, size = 0; int cdsect = 0; int32_t n; for (*len = o = 0; cdsect || data[*len] != delim; o++) { if (!data[*len] || (!cdsect && !strncmp(&data[*len], "]]>", 3))) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element content, \"]]>\" found"); goto error; } loop: if (o > BUFSIZE - 4) { /* add buffer into the result */ if (result) { size = size + o; result = ly_realloc(result, size + 1); } else { size = o; result = malloc((size + 1) * sizeof *result); } LY_CHECK_ERR_RETURN(!result, LOGMEM(ctx), NULL); memcpy(&result[size - o], buf, o); /* write again into the beginning of the buffer */ o = 0; } if (cdsect || !strncmp(&data[*len], "", 3)) { *len += 3; cdsect = 0; o--; /* we don't write any data in this iteration */ } else { buf[o] = data[*len]; (*len)++; } } else if (data[*len] == '&') { (*len)++; if (data[*len] != '#') { /* entity reference - only predefined refs are supported */ if (!strncmp(&data[*len], "lt;", 3)) { buf[o] = '<'; *len += 3; } else if (!strncmp(&data[*len], "gt;", 3)) { buf[o] = '>'; *len += 3; } else if (!strncmp(&data[*len], "amp;", 4)) { buf[o] = '&'; *len += 4; } else if (!strncmp(&data[*len], "apos;", 5)) { buf[o] = '\''; *len += 5; } else if (!strncmp(&data[*len], "quot;", 5)) { buf[o] = '\"'; *len += 5; } else { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "entity reference (only predefined references are supported)"); goto error; } } else { /* character reference */ (*len)++; if (isdigit(data[*len])) { for (n = 0; isdigit(data[*len]); (*len)++) { n = (10 * n) + (data[*len] - '0'); } if (data[*len] != ';') { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference, missing semicolon"); goto error; } } else if (data[(*len)++] == 'x' && isxdigit(data[*len])) { for (n = 0; isxdigit(data[*len]); (*len)++) { if (isdigit(data[*len])) { r = (data[*len] - '0'); } else if (data[*len] > 'F') { r = 10 + (data[*len] - 'a'); } else { r = 10 + (data[*len] - 'A'); } n = (16 * n) + r; } } else { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference"); goto error; } r = pututf8(ctx, &buf[o], n); if (!r) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "character reference value"); goto error; } o += r - 1; /* o is ++ in for loop */ (*len)++; } } else { r = copyutf8(ctx, &buf[o], &data[*len]); if (!r) { goto error; } o += r - 1; /* o is ++ in for loop */ (*len) = (*len) + r; } } if (delim == '<' && !strncmp(&data[*len], "type = LYXML_ATTR_NS; ((struct lyxml_ns *)attr)->parent = parent; c += 5; if (*c != ':') { /* default namespace, prefix will be empty */ goto equal; } c++; /* go after ':' to the prefix value */ } else { /* attribute */ attr = calloc(1, sizeof *attr); LY_CHECK_ERR_RETURN(!attr, LOGMEM(ctx), NULL); attr->type = LYXML_ATTR_STD; } /* process name part of the attribute */ start = c; uc = lyxml_getutf8(ctx, c, &size); if (!is_xmlnamestartchar(uc)) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the attribute"); free(attr); return NULL; } xml_flag = 4; if (*c == 'x') { xml_flag = 1; } c += size; uc = lyxml_getutf8(ctx, c, &size); while (is_xmlnamechar(uc)) { if (attr->type == LYXML_ATTR_STD) { if ((*c == ':') && (xml_flag != 3)) { /* attribute in a namespace (but disregard the special "xml" namespace) */ start = c + 1; /* look for the prefix in namespaces */ if (prefix) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "prefix start, \":\" already parsed"); goto error; } prefix = malloc((c - data + 1) * sizeof *prefix); LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error); memcpy(prefix, data, c - data); prefix[c - data] = '\0'; attr->ns = lyxml_get_ns(parent, prefix); if (!attr->ns) { /* remember the prefix for later resolution */ attr->type = LYXML_ATTR_STD_UNRES; attr->ns = (struct lyxml_ns *)prefix; prefix = NULL; } } else if (((*c == 'm') && (xml_flag == 1)) || ((*c == 'l') && (xml_flag == 2))) { ++xml_flag; } else { xml_flag = 4; } } c += size; uc = lyxml_getutf8(ctx, c, &size); } /* store the name */ size = c - start; attr->name = lydict_insert(ctx, start, size); equal: /* check Eq mark that can be surrounded by whitespaces */ ign_xmlws(c); if (*c != '=') { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute definition, \"=\" expected"); goto error; } c++; ign_xmlws(c); /* process value part of the attribute */ if (!*c || (*c != '"' && *c != '\'')) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "attribute value, \" or \' expected"); goto error; } delim = c; str = parse_text(ctx, ++c, *delim, &size); if (!str && !size) { goto error; } attr->value = lydict_insert_zc(ctx, str); *len = c + size + 1 - data; /* +1 is delimiter size */ /* put attribute into the parent's attributes list */ if (parent->attr) { /* go to the end of the list */ for (a = parent->attr; a->next; a = a->next); /* and append new attribute */ a->next = attr; } else { /* add the first attribute in the list */ parent->attr = attr; } free(prefix); return attr; error: lyxml_free_attr(ctx, NULL, attr); free(prefix); return NULL; } /* logs directly */ struct lyxml_elem * lyxml_parse_elem(struct ly_ctx *ctx, const char *data, unsigned int *len, struct lyxml_elem *parent, int options, int bt_count) { const char *c = data, *start, *e; const char *lws; /* leading white space for handling mixed content */ int uc; char *str; char *prefix = NULL; unsigned int prefix_len = 0; struct lyxml_elem *elem = NULL, *child; struct lyxml_attr *attr; unsigned int size; int nons_flag = 0, closed_flag = 0; *len = 0; if (bt_count > LY_RECURSION_LIMIT) { LOGVAL(ctx, LYE_SPEC, LY_VLOG_NONE, NULL, "Recursion limit %d reached", LY_RECURSION_LIMIT); return NULL; } if (*c != '<') { return NULL; } /* locate element name */ c++; e = c; uc = lyxml_getutf8(ctx, e, &size); if (!is_xmlnamestartchar(uc)) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "NameStartChar of the element"); return NULL; } e += size; uc = lyxml_getutf8(ctx, e, &size); while (is_xmlnamechar(uc)) { if (*e == ':') { if (prefix_len) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_NONE, NULL, "element name, multiple colons found"); goto error; } /* element in a namespace */ start = e + 1; /* look for the prefix in namespaces */ prefix_len = e - c; LY_CHECK_ERR_GOTO(prefix, LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, e), error); prefix = malloc((prefix_len + 1) * sizeof *prefix); LY_CHECK_ERR_GOTO(!prefix, LOGMEM(ctx), error); memcpy(prefix, c, prefix_len); prefix[prefix_len] = '\0'; c = start; } e += size; uc = lyxml_getutf8(ctx, e, &size); } if (!*e) { LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL); free(prefix); return NULL; } /* allocate element structure */ elem = calloc(1, sizeof *elem); LY_CHECK_ERR_RETURN(!elem, free(prefix); LOGMEM(ctx), NULL); elem->next = NULL; elem->prev = elem; if (parent) { lyxml_add_child(ctx, parent, elem); } /* store the name into the element structure */ elem->name = lydict_insert(ctx, c, e - c); c = e; process: ign_xmlws(c); if (!strncmp("/>", c, 2)) { /* we are done, it was EmptyElemTag */ c += 2; elem->content = lydict_insert(ctx, "", 0); closed_flag = 1; } else if (*c == '>') { /* process element content */ c++; lws = NULL; while (*c) { if (!strncmp(c, "child) { /* leading white spaces were actually content */ goto store_content; } /* Etag */ c += 2; /* get name and check it */ e = c; uc = lyxml_getutf8(ctx, e, &size); if (!is_xmlnamestartchar(uc)) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "NameStartChar of the element"); goto error; } e += size; uc = lyxml_getutf8(ctx, e, &size); while (is_xmlnamechar(uc)) { if (*e == ':') { /* element in a namespace */ start = e + 1; /* look for the prefix in namespaces */ if (!prefix || memcmp(prefix, c, e - c)) { LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Invalid (different namespaces) opening (%s) and closing element tags.", elem->name); goto error; } c = start; } e += size; uc = lyxml_getutf8(ctx, e, &size); } if (!*e) { LOGVAL(ctx, LYE_EOF, LY_VLOG_NONE, NULL); goto error; } /* check that it corresponds to opening tag */ size = e - c; str = malloc((size + 1) * sizeof *str); LY_CHECK_ERR_GOTO(!str, LOGMEM(ctx), error); memcpy(str, c, e - c); str[e - c] = '\0'; if (size != strlen(elem->name) || memcmp(str, elem->name, size)) { LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Invalid (mixed names) opening (%s) and closing (%s) element tags.", elem->name, str); free(str); goto error; } free(str); c = e; ign_xmlws(c); if (*c != '>') { LOGVAL(ctx, LYE_SPEC, LY_VLOG_XML, elem, "Data after closing element tag \"%s\".", elem->name); goto error; } c++; if (!(elem->flags & LYXML_ELEM_MIXED) && !elem->content) { /* there was no content, but we don't want NULL (only if mixed content) */ elem->content = lydict_insert(ctx, "", 0); } closed_flag = 1; break; } else if (!strncmp(c, "", &size)) { goto error; } c += size; } else if (!strncmp(c, "", &size)) { goto error; } c += size; } else if (!strncmp(c, "flags & LYXML_ELEM_MIXED) { /* we have a mixed content */ goto store_content; } else { /* leading white spaces were only formatting */ lws = NULL; } } if (elem->content) { /* we have a mixed content */ if (options & LYXML_PARSE_NOMIXEDCONTENT) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content"); goto error; } child = calloc(1, sizeof *child); LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error); child->content = elem->content; elem->content = NULL; lyxml_add_child(ctx, elem, child); elem->flags |= LYXML_ELEM_MIXED; } child = lyxml_parse_elem(ctx, c, &size, elem, options, bt_count + 1); if (!child) { goto error; } c += size; /* move after processed child element */ } else if (is_xmlws(*c)) { lws = c; ign_xmlws(c); } else { store_content: /* store text content */ if (lws) { /* process content including the leading white spaces */ c = lws; lws = NULL; } str = parse_text(ctx, c, '<', &size); if (!str && !size) { goto error; } elem->content = lydict_insert_zc(ctx, str); c += size; /* move after processed text content */ if (elem->child) { /* we have a mixed content */ if (options & LYXML_PARSE_NOMIXEDCONTENT) { LOGVAL(ctx, LYE_XML_INVAL, LY_VLOG_XML, elem, "XML element with mixed content"); goto error; } child = calloc(1, sizeof *child); LY_CHECK_ERR_GOTO(!child, LOGMEM(ctx), error); child->content = elem->content; elem->content = NULL; lyxml_add_child(ctx, elem, child); elem->flags |= LYXML_ELEM_MIXED; } } } } else { /* process attribute */ attr = parse_attr(ctx, c, &size, elem); if (!attr) { goto error; } c += size; /* move after processed attribute */ /* check namespace */ if (attr->type == LYXML_ATTR_NS) { if ((!prefix || !prefix[0]) && !attr->name) { if (attr->value) { /* default prefix */ elem->ns = (struct lyxml_ns *)attr; } else { /* xmlns="" -> no namespace */ nons_flag = 1; } } else if (prefix && prefix[0] && attr->name && !strncmp(attr->name, prefix, prefix_len + 1)) { /* matching namespace with prefix */ elem->ns = (struct lyxml_ns *)attr; } } /* go back to finish element processing */ goto process; } *len = c - data; if (!closed_flag) { LOGVAL(ctx, LYE_XML_MISS, LY_VLOG_XML, elem, "closing element tag", elem->name); goto error; } /* resolve all attribute prefixes */ LY_TREE_FOR(elem->attr, attr) { if (attr->type == LYXML_ATTR_STD_UNRES) { str = (char *)attr->ns; attr->ns = lyxml_get_ns(elem, str); free(str); attr->type = LYXML_ATTR_STD; } } if (!elem->ns && !nons_flag && parent) { elem->ns = lyxml_get_ns(parent, prefix_len ? prefix : NULL); } free(prefix); return elem; error: lyxml_free(ctx, elem); free(prefix); return NULL; } /* logs directly */ API struct lyxml_elem * lyxml_parse_mem(struct ly_ctx *ctx, const char *data, int options) { FUN_IN; const char *c = data; unsigned int len; struct lyxml_elem *root, *first = NULL, *next; if (!ctx) { LOGARG; return NULL; } if (!data) { /* nothing to parse */ return NULL; } repeat: /* process document */ while (1) { if (!*c) { /* eof */ return first; } else if (is_xmlws(*c)) { /* skip whitespaces */ ign_xmlws(c); } else if (!strncmp(c, "", &len)) { goto error; } c += len; } else if (!strncmp(c, "", &len)) { goto error; } c += len; } else if (!strncmp(c, " */ LOGERR(ctx, LY_EINVAL, "DOCTYPE not supported in XML documents."); goto error; } else if (*c == '<') { /* element - process it in next loop to strictly follow XML * format */ break; } else { LOGVAL(ctx, LYE_XML_INCHAR, LY_VLOG_NONE, NULL, c); goto error; } } root = lyxml_parse_elem(ctx, c, &len, NULL, options, 0); if (!root) { goto error; } else if (!first) { first = root; } else { first->prev->next = root; root->prev = first->prev; first->prev = root; } c += len; /* ignore the rest of document where can be comments, PIs and whitespaces, * note that we are not detecting syntax errors in these parts */ ign_xmlws(c); if (*c) { if (options & LYXML_PARSE_MULTIROOT) { goto repeat; } else { LOGWRN(ctx, "There are some not parsed data:\n%s", c); } } return first; error: LY_TREE_FOR_SAFE(first, next, root) { lyxml_free(ctx, root); } return NULL; } API struct lyxml_elem * lyxml_parse_path(struct ly_ctx *ctx, const char *filename, int options) { FUN_IN; struct lyxml_elem *elem = NULL; size_t length; int fd; char *addr; if (!filename || !ctx) { LOGARG; return NULL; } fd = open(filename, O_RDONLY); if (fd == -1) { LOGERR(ctx, LY_EINVAL,"Opening file \"%s\" failed.", filename); return NULL; } if (lyp_mmap(ctx, fd, 0, &length, (void **)&addr)) { LOGERR(ctx, LY_ESYS, "Mapping file descriptor into memory failed (%s()).", __func__); goto error; } else if (!addr) { /* empty XML file */ goto error; } elem = lyxml_parse_mem(ctx, addr, options); lyp_munmap(addr, length); close(fd); return elem; error: if (fd != -1) { close(fd); } return NULL; } int lyxml_dump_text(struct lyout *out, const char *text, LYXML_DATA_TYPE type) { unsigned int i, n; if (!text) { return 0; } for (i = n = 0; text[i]; i++) { switch (text[i]) { case '&': n += ly_print(out, "&"); break; case '<': n += ly_print(out, "<"); break; case '>': /* not needed, just for readability */ n += ly_print(out, ">"); break; case '"': if (type == LYXML_DATA_ATTR) { n += ly_print(out, """); break; } /* falls through */ default: ly_write(out, &text[i], 1); n++; } } return n; } static int dump_elem(struct lyout *out, const struct lyxml_elem *e, int level, int options, int last_elem) { int size = 0; struct lyxml_attr *a; struct lyxml_elem *child; const char *delim, *delim_outer; int indent; if (!e->name) { /* mixed content */ if (e->content) { return lyxml_dump_text(out, e->content, LYXML_DATA_ELEM); } else { return 0; } } delim = delim_outer = (options & LYXML_PRINT_FORMAT) ? "\n" : ""; indent = 2 * level; if ((e->flags & LYXML_ELEM_MIXED) || (e->parent && (e->parent->flags & LYXML_ELEM_MIXED))) { delim = ""; } if (e->parent && (e->parent->flags & LYXML_ELEM_MIXED)) { delim_outer = ""; indent = 0; } if (last_elem && (options & LYXML_PRINT_NO_LAST_NEWLINE)) { delim_outer = ""; } if (!(options & (LYXML_PRINT_OPEN | LYXML_PRINT_CLOSE | LYXML_PRINT_ATTRS)) || (options & LYXML_PRINT_OPEN)) { /* opening tag */ if (e->ns && e->ns->prefix) { size += ly_print(out, "%*s<%s:%s", indent, "", e->ns->prefix, e->name); } else { size += ly_print(out, "%*s<%s", indent, "", e->name); } } else if (options & LYXML_PRINT_CLOSE) { indent = 0; goto close; } /* attributes */ for (a = e->attr; a; a = a->next) { if (a->type == LYXML_ATTR_NS) { if (a->name) { size += ly_print(out, " xmlns:%s=\"", a->name); } else { size += ly_print(out, " xmlns=\""); } } else if (a->ns && a->ns->prefix) { size += ly_print(out, " %s:%s=\"", a->ns->prefix, a->name); } else { size += ly_print(out, " %s=\"", a->name); } if (a->value) { size += lyxml_dump_text(out, a->value, LYXML_DATA_ATTR); } else { size += ly_print(out, """"); } size += ly_print(out, "\""); } /* apply options */ if ((options & LYXML_PRINT_CLOSE) && (options & LYXML_PRINT_OPEN)) { size += ly_print(out, "/>%s", delim); return size; } else if (options & LYXML_PRINT_OPEN) { ly_print(out, ">"); return ++size; } else if (options & LYXML_PRINT_ATTRS) { return size; } if (!e->child && (!e->content || !e->content[0])) { size += ly_print(out, "/>%s", delim); return size; } else if (e->content && e->content[0]) { ly_print(out, ">"); size++; size += lyxml_dump_text(out, e->content, LYXML_DATA_ELEM); if (e->ns && e->ns->prefix) { size += ly_print(out, "%s", e->ns->prefix, e->name, delim); } else { size += ly_print(out, "%s", e->name, delim); } return size; } else { size += ly_print(out, ">%s", delim); } /* go recursively */ LY_TREE_FOR(e->child, child) { if (options & LYXML_PRINT_FORMAT) { size += dump_elem(out, child, level + 1, LYXML_PRINT_FORMAT, 0); } else { size += dump_elem(out, child, level, 0, 0); } } close: /* closing tag */ if (e->ns && e->ns->prefix) { size += ly_print(out, "%*s%s", indent, "", e->ns->prefix, e->name, delim_outer); } else { size += ly_print(out, "%*s%s", indent, "", e->name, delim_outer); } return size; } static int dump_siblings(struct lyout *out, const struct lyxml_elem *e, int options) { const struct lyxml_elem *start, *iter, *next; int ret = 0; if (e->parent) { start = e->parent->child; } else { start = e; while(start->prev && start->prev->next) { start = start->prev; } } LY_TREE_FOR_SAFE(start, next, iter) { ret += dump_elem(out, iter, 0, options, (next ? 0 : 1)); } return ret; } API int lyxml_print_file(FILE *stream, const struct lyxml_elem *elem, int options) { FUN_IN; struct lyout out; if (!stream || !elem) { return 0; } memset(&out, 0, sizeof out); out.type = LYOUT_STREAM; out.method.f = stream; if (options & LYXML_PRINT_SIBLINGS) { return dump_siblings(&out, elem, options); } else { return dump_elem(&out, elem, 0, options, 1); } } API int lyxml_print_fd(int fd, const struct lyxml_elem *elem, int options) { FUN_IN; struct lyout out; if (fd < 0 || !elem) { return 0; } memset(&out, 0, sizeof out); out.type = LYOUT_FD; out.method.fd = fd; if (options & LYXML_PRINT_SIBLINGS) { return dump_siblings(&out, elem, options); } else { return dump_elem(&out, elem, 0, options, 1); } } API int lyxml_print_mem(char **strp, const struct lyxml_elem *elem, int options) { FUN_IN; struct lyout out; int r; if (!strp || !elem) { return 0; } memset(&out, 0, sizeof out); out.type = LYOUT_MEMORY; if (options & LYXML_PRINT_SIBLINGS) { r = dump_siblings(&out, elem, options); } else { r = dump_elem(&out, elem, 0, options, 1); } *strp = out.method.mem.buf; return r; } API int lyxml_print_clb(ssize_t (*writeclb)(void *arg, const void *buf, size_t count), void *arg, const struct lyxml_elem *elem, int options) { FUN_IN; struct lyout out; if (!writeclb || !elem) { return 0; } memset(&out, 0, sizeof out); out.type = LYOUT_CALLBACK; out.method.clb.f = writeclb; out.method.clb.arg = arg; if (options & LYXML_PRINT_SIBLINGS) { return dump_siblings(&out, elem, options); } else { return dump_elem(&out, elem, 0, options, 1); } }