1 /* zxlibdec.c - Utility functions for generated decoders
2 * Copyright (c) 2013 Synergetics SA (sampo@synergetics.be), All Rights Reserved.
3 * Copyright (c) 2010 Sampo Kellomaki (sampo@iki.fi), All Rights Reserved.
4 * Copyright (c) 2006-2009 Symlabs (symlabs@symlabs.com), All Rights Reserved.
5 * Author: Sampo Kellomaki (sampo@iki.fi)
6 * This is confidential unpublished proprietary source code of the author.
7 * NO WARRANTY, not even implied warranties. Contains trade secrets.
8 * Distribution prohibited unless authorized in writing.
9 * Licensed under Apache License 2.0, see file COPYING.
10 * $Id: zxlib.c,v 1.41 2009-11-24 23:53:40 sampo Exp $
11 *
12 * 28.5.2006, created --Sampo
13 * 8.8.2006, moved lookup functions to generated code --Sampo
14 * 12.8.2006, added special scanning of xmlns to avoid backtracking elem recognition --Sampo
15 * 26.8.2006, significant Common Subexpression Elimination (CSE) --Sampo
16 * 30.9.2007, more CSE --Sampo
17 * 7.10.2008, added documentation --Sampo
18 * 26.5.2010, added XML parse error reporting --Sampo
19 * 27.10.2010, forked from zxlib.c, re-engineered namespace handling --Sampo
20 * 20.11.2010, reengineered for unified simplifed decoder --Sampo
21 * 30.11.2013, fixed bondary condition of loop looking one past end (found by valgrind) --Sampo
22 */
23
24 #include "platform.h" /* needed on Win32 for snprintf(), va_copy() et al. */
25
26 //#include <pthread.h>
27 #include <memory.h>
28 #include <string.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32
33 #include "errmac.h"
34 #include "zx.h"
35 #include "c/zx-data.h"
36 #include "c/zx-ns.h"
37
38 /*() Format error message describing an XML parse error. The buf argument
39 * should be at leaset 256 bytes for satisfactory results. */
40
41 /* Called by: covimp_test, zxid_wsp_validate */
zx_format_parse_error(struct zx_ctx * ctx,char * buf,int siz,char * logkey)42 int zx_format_parse_error(struct zx_ctx* ctx, char* buf, int siz, char* logkey)
43 {
44 int at, end, start, len;
45 end = ctx->lim - ctx->bas;
46 at = MIN(ctx->p - ctx->bas, end);
47 start = MAX(0,at-30);
48 len = MIN(at+30, end) - start;
49 len = snprintf(buf, siz, "%s: Parse error at char %d/%d (prev char, char, next char: 0x%02x 0x%02x 0x%02x)\n%.*s\n%.*s^\n", logkey, at, end, at > 0 ? ctx->p[-1]:0, ctx->p[0], at < end ? ctx->p[1]:0, len, ctx->bas + start, at-start, "-----------------------------------------------");
50 buf[siz-1] = 0; /* must terminate manually as on win32 nul is not guaranteed */
51 return len;
52 }
53
54 /* Called by: covimp_test, zx_dec_attr_val x2, zx_scan_elem_end, zx_scan_pi_or_comment, zx_scan_xmlns x2 */
zx_xml_parse_err(struct zx_ctx * c,char quote,const char * func,const char * msg)55 void zx_xml_parse_err(struct zx_ctx* c, char quote, const char* func, const char* msg)
56 {
57 const char* errloc = MAX(c->p - 20, c->bas);
58 ERR("%s: %s: char(%c) pos=%d (%.*s)", func, msg, quote,
59 ((int)(c->p - c->bas)), (int)MIN(c->lim - errloc, 40), errloc);
60 }
61
62 /* Called by: zx_xmlns_decl */
zx_xml_parse_dbg(struct zx_ctx * c,char quote,const char * func,const char * msg)63 void zx_xml_parse_dbg(struct zx_ctx* c, char quote, const char* func, const char* msg)
64 {
65 const char* errloc = MAX(c->p - 20, c->bas);
66 D("%s: %s: char(%c) pos=%d (%.*s)", func, msg, quote,
67 ((int)(c->p - c->bas)), (int)MIN(c->lim - errloc, 40), errloc);
68 }
69
70 /* --------------------- D e c o d e r ---------------------- */
71
72 /* Called by: zx_DEC_elem */
zx_scan_data(struct zx_ctx * c,struct zx_elem_s * el)73 static int zx_scan_data(struct zx_ctx* c, struct zx_elem_s* el)
74 {
75 struct zx_str* ss;
76 const char* d = c->p;
77 if (*c->p)
78 ZX_LOOK_FOR(c,'<');
79 ss = ZX_ZALLOC(c, struct zx_str);
80 ss->len = c->p - d;
81 ss->s = (char*)d;
82 ss->tok = ZX_TOK_DATA;
83 ss->n = &el->kids->g;
84 el->kids = (struct zx_elem_s*)ss;
85 return 1;
86
87 look_for_not_found:
88 /*zx_xml_parse_err(c, '<', (const char*)__FUNCTION__, "look for not found"); -- Causes bogus warning in end of buffer. */
89 return 0;
90 }
91
92 /* Called by: zx_DEC_elem */
zx_scan_pi_or_comment(struct zx_ctx * c)93 static int zx_scan_pi_or_comment(struct zx_ctx* c)
94 {
95 const char* name;
96 char quote;
97
98 switch (*c->p) {
99 case '?': /* processing instruction <?xml ... ?> */
100 name = c->p-1;
101 DD("Processing Instruction detected (%.*s)", 5, name);
102 while (1) {
103 quote = '>';
104 ZX_LOOK_FOR(c,'>');
105 if (c->p[-1] == '?')
106 break;
107 }
108 ++c->p;
109 DD("Processing Instruction scanned (%.*s)", c->p-name, name);
110 /*ZX_PI_DEC_EXT(pi);*/
111 return 0;
112 case '!': /* comment <!-- ... --> or <!DOCTYPE...> */
113 name = c->p-1;
114 if (!memcmp(c->p+1, "DOCTYPE", sizeof("DOCTYPE")-1)) {
115 D("DOCTYPE detected (%.*s)", 60, c->p-1);
116 ZX_LOOK_FOR(c,'>');
117 ++c->p;
118 D("DOCTYPE scanned (%.*s)", ((int)(c->p-name)), name);
119 return 0;
120 }
121 c->p += 2;
122 if (c->p[-1] != '-' || c->p[0] != '-') {
123 c->p -= 3;
124 return 1;
125 }
126 D("Comment detected (%.*s)", 8, name);
127 c->p += 2;
128 while (1) {
129 quote = '>';
130 ZX_LOOK_FOR(c,'>');
131 if (c->p[-2] == '-' && c->p[-1] == '-') {
132 break;
133 }
134 }
135 ++c->p;
136 D("Comment scanned (%.*s)", ((int)(c->p-name)), name);
137 /*ZX_COMMENT_DEC_EXT(comment);*/
138 return 0;
139 }
140 return 1;
141 look_for_not_found:
142 zx_xml_parse_err(c, quote, (const char*)__FUNCTION__, "look for not found");
143 return 1;
144 }
145
146 /*() Assuming current c->p points to a name, scan until end of the name.
147 * Called from innards for dec-templ.c for CSE. Leaves c->p pointing to char after name. */
148
149 /* Called by: zx_el_lookup */
zx_scan_elem_start(struct zx_ctx * c,const char * func)150 static const char* zx_scan_elem_start(struct zx_ctx* c, const char* func)
151 {
152 const char* name = c->p;
153 int len = strcspn(c->p, " >/\n\r\t");
154 c->p += len;
155 /*for (++c->p; c->p<c->lim && !ONE_OF_6(*c->p, ' ', '>', '/', '\n', '\r', '\t'); ++c->p) ;*/
156 if (c->p < c->lim)
157 return name;
158 ERR("%s: Incomplete %.*s", func, ((int)(c->lim-name)), name);
159 return 0;
160 }
161
162 /*() End of tag detection called from innards for dec-templ.c for CSE. */
163
164 /* Called by: zx_DEC_elem */
zx_scan_elem_end(struct zx_ctx * c,const char * start,const char * func)165 static int zx_scan_elem_end(struct zx_ctx* c, const char* start, const char* func)
166 {
167 const char* name;
168 const char* errloc;
169 ++c->p;
170 if (c->p >= c->lim)
171 goto look_for_not_found;
172 name = c->p;
173 ZX_LOOK_FOR(c,'>');
174 if (memcmp(start?start:"", name, c->p-name)) {
175 errloc = MAX(c->p - 20, c->bas);
176 ERR("%s: Mismatching close tag(%.*s) pos=%d (%.*s)", func, ((int)(c->p-name)), name, ((int)(c->p - c->bas)), (int)MIN(c->lim - errloc, 40), errloc);
177 ++c->p;
178 return 0;
179 }
180 return 1;
181
182 look_for_not_found:
183 zx_xml_parse_err(c, '>', func, "char not found");
184 return 0;
185 }
186
187 /*() Check the child element ordering of a token against schema.
188 * Returns 0 if ordering is good. If ordering is bad, returns index to
189 * the offending child element. This check does not verify whether all
190 * mandatory child elements are present - it merely checks that the
191 * order is right.
192 *
193 * N.B. Check c/zx-elems.c for the tables that show the ordering (tables
194 * were generated from schema so hand editing them is not recommended). */
195
196 /* Called by: zx_reverse_elem_lists */
zx_chk_el_ord(struct zx_elem_s * x)197 static int zx_chk_el_ord(struct zx_elem_s* x)
198 {
199 int i,j,n;
200 struct zx_el_tok* et;
201 struct zx_el_tok* ef;
202 struct zx_el_desc* ed = zx_el_desc_lookup(x->g.tok);
203 if (!ed)
204 return 0;
205 x = x->kids;
206 for (n = i = j = 0; x; i = j, x = (struct zx_elem_s*)x->g.n) {
207 ++n;
208 if (x->g.tok == ZX_TOK_DATA)
209 continue;
210 for (j = i; ed->el_order[j] != ZX_TOK_NOT_FOUND; ++j)
211 if (x->g.tok == ed->el_order[j]) {
212 if ((errmac_debug & ERRMAC_DEBUG_MASK)>2) {
213 et = zx_el_tab + (x->g.tok & ZX_TOK_TOK_MASK);
214 ef = zx_el_tab + MINMAX(ed->tok & ZX_TOK_TOK_MASK, 0, zx__ELEM_MAX);
215 D("Right: Known <%s> tok(0x%06x) as %d. child of <%s> tok(0x%06x) (%d,%d)", et->name, x->g.tok, n, ef->name, ed->tok, i, j);
216 }
217 break;
218 }
219 if (ed->el_order[j] == ZX_TOK_NOT_FOUND) {
220 if (x->g.tok == ZX_TOK_NOT_FOUND || !IN_RANGE(x->g.tok & ZX_TOK_TOK_MASK, 0, zx__ELEM_MAX)) {
221 ef = zx_el_tab + MINMAX(ed->tok & ZX_TOK_TOK_MASK, 0, zx__ELEM_MAX);
222 /* *** ideally this should be flagged as error, but problem is that we
223 * permit freeform bodies so there are a lot of unknown tokens like this. */
224 D("Unknown <%.*s> token(0x%06x) as %d. child of <%s> 0x%06x (%d,%d)", x->g.len, x->g.s, x->g.tok, n, ef->name, ed->tok, i, j);
225 } else {
226 et = zx_el_tab + (x->g.tok & ZX_TOK_TOK_MASK);
227 ef = zx_el_tab + MINMAX(ed->tok & ZX_TOK_TOK_MASK, 0, zx__ELEM_MAX);
228 ERR("WRONG: Known <%s> tok(0x%06x) in wrong place as %d. child of <%s> tok(0x%06x) (%d,%d)", et->name, x->g.tok, n, ef->name, ed->tok, i, j);
229 // *** we should really dump the whole message into log
230 }
231 return n;
232 }
233 }
234 return 0;
235 }
236
237 /*() Insert an attribute to element's attribute list in canoncically sorted
238 * place, i.e. no namespace sorts first, namespaced attribute sort by
239 * namespace URI (not namespace prefix). Assumes the attribute
240 * list has so far been sorted. Used as part of insertion sort. */
241
242 /* Called by: zx_reverse_elem_lists */
zx_ord_ins_at(struct zx_elem_s * x,struct zx_attr_s * in_at)243 struct zx_attr_s* zx_ord_ins_at(struct zx_elem_s* x, struct zx_attr_s* in_at)
244 {
245 struct zx_attr_s* at;
246 struct zx_attr_s** atp;
247 const char* at_name;
248 const char* in_at_name;
249 const char* p;
250 int res, at_name_len, in_at_name_len;
251
252 atp = &x->attr;
253 for (at = x->attr; at; atp = (struct zx_attr_s**)&at->g.n, at = (struct zx_attr_s*)at->g.n) {
254 if (!in_at->ns && IN_RANGE((in_at->g.tok & ZX_TOK_NS_MASK)>>ZX_TOK_NS_SHIFT, 1, zx__NS_MAX))
255 in_at->ns = zx_ns_tab + ((in_at->g.tok & ZX_TOK_NS_MASK)>>ZX_TOK_NS_SHIFT);
256 if (in_at->ns) {
257 if (!at->ns && IN_RANGE((at->g.tok & ZX_TOK_NS_MASK)>>ZX_TOK_NS_SHIFT, 1, zx__NS_MAX))
258 at->ns = zx_ns_tab + ((at->g.tok & ZX_TOK_NS_MASK)>>ZX_TOK_NS_SHIFT);
259 if (at->ns) {
260 if (at->ns != in_at->ns) {
261 res = memcmp(at->ns->url, in_at->ns->url, MIN(at->ns->url_len, in_at->ns->url_len));
262 if (res > 0)
263 break;
264 if (res < 0)
265 continue;
266 if (at->ns->url_len > in_at->ns->url_len)
267 break;
268 if (at->ns->url_len < in_at->ns->url_len)
269 continue;
270 }
271 } else
272 continue; /* at has no namespace, sorts earlier than in_at that has namespace. */
273 } else {
274 if (at->ns)
275 break; /* No namespace sorts before namespace */
276 }
277 /* Neither has namespace, or namespaces were equal: sort by attribute name */
278
279 if (at->name) {
280 at_name = at->name;
281 at_name_len = at->name_len;
282 p = memchr(at_name, ':', at_name_len);
283 if (p) {
284 at_name_len -= 1 + p - at_name;
285 at_name = p+1;
286 }
287 } else {
288 if (IN_RANGE((at->g.tok & ZX_TOK_TOK_MASK), 0, zx__ATTR_MAX)) {
289 at_name = (char*)zx_at_tab[at->g.tok & ZX_TOK_TOK_MASK].name;
290 at_name_len = strlen(at_name);
291 } else {
292 ERR("Attribute supplied without name and tok 0x%06x is out of range", at->g.tok);
293 break;
294 }
295 }
296 if (in_at->name) {
297 in_at_name = in_at->name;
298 in_at_name_len = in_at->name_len;
299 p = memchr(in_at_name, ':', in_at_name_len);
300 if (p) {
301 in_at_name_len -= 1 + p - in_at_name;
302 in_at_name = p+1;
303 }
304 } else {
305 if (IN_RANGE((in_at->g.tok & ZX_TOK_TOK_MASK), 0, zx__ATTR_MAX)) {
306 in_at_name = (char*)zx_at_tab[in_at->g.tok & ZX_TOK_TOK_MASK].name;
307 in_at_name_len = strlen(in_at_name);
308 } else {
309 ERR("Attribute supplied without name and tok 0x%06x is out of range", in_at->g.tok);
310 break;
311 }
312 }
313 res = memcmp(at_name, in_at_name, MIN(at_name_len, in_at_name_len));
314 if (res > 0 || !res && at_name_len >= in_at_name_len)
315 break;
316 }
317 in_at->g.n = &at->g;
318 *atp = in_at;
319 return in_at;
320 }
321
322 /*() Since kids lists of elements are usually built by adding
323 * nodes at the root of the list, they end up being in reverse order.
324 * Thus we need to reverse them to get them to right order. We
325 * take this opportunity to also check that the ordering is correct
326 * and also to sort the XML attributes.
327 * Called from dec-templ.c for CSE elimination.
328 * N.B. This function is not recursive: only one level is reversed.
329 * N.B2. Although decoder returns lists in reverse order, we try
330 * to maintain as common representation the forward ordered list. */
331
332 /* Called by: zx_DEC_elem, zxenc_pubkey_enc, zxenc_symkey_enc, zxid_ac_desc, zxid_add_env_if_needed, zxid_add_fed_tok2epr, zxid_ar_desc, zxid_az_soap, zxid_contact_desc, zxid_idp_sso_desc, zxid_key_desc, zxid_key_info, zxid_mk_a7n, zxid_mk_authn_req, zxid_mk_az, zxid_mk_az_cd1, zxid_mk_di_req_svc, zxid_mk_mni, zxid_mk_saml_resp, zxid_mk_xac_az, zxid_mk_xacml_resp, zxid_mk_xacml_simple_at, zxid_mni_desc, zxid_nimap_desc, zxid_org_desc, zxid_slo_desc, zxid_sp_meta, zxid_sp_sso_desc, zxid_sso_desc, zxid_wsc_prep, zxid_wsf_decor x2, zxid_wsp_decorate, zxsig_sign x3 */
zx_reverse_elem_lists(struct zx_elem_s * x)333 void zx_reverse_elem_lists(struct zx_elem_s* x)
334 {
335 struct zx_elem_s* iternode;
336 struct zx_attr_s* in_at;
337 struct zx_attr_s* at_next;
338
339 iternode = x->kids;
340 REVERSE_LIST_NEXT(x->kids, iternode, g.n);
341
342 zx_chk_el_ord(x);
343
344 /* Insertion sort attribute list in alphabetical order 1st by NS URI, then by attribute name */
345
346 in_at = x->attr;
347 if (!in_at || !in_at->g.n)
348 return; /* Nothing to sort (no attributes or just one attribute) */
349
350 at_next = in_at; /* Start insertion sort by considering first to already be in place. */
351 in_at = (struct zx_attr_s*)in_at->g.n;
352 at_next->g.n = 0;
353
354 for (; in_at; in_at = at_next) {
355 at_next = (struct zx_attr_s*)in_at->g.n;
356 zx_ord_ins_at(x, in_at);
357 }
358 }
359
360 /*() Called from dec-templ.c for CSE elimination. */
361
362 /* Called by: zx_attr_lookup */
zx_dec_attr_val(struct zx_ctx * c,const char * func)363 static const char* zx_dec_attr_val(struct zx_ctx* c, const char* func)
364 {
365 const char* data;
366 char quote = '=';
367 ZX_LOOK_FOR(c,'=');
368
369 ++c->p;
370 if (c->p >= c->lim)
371 goto look_for_not_found;
372 if (!ONE_OF_2(*c->p, '"', '\'')) {
373 zx_xml_parse_err(c, *c->p, func, "zx_dec_attr_val: Did not find expected quote char (single or double), saw");
374 return 0;
375 }
376 quote = *c->p;
377 ++c->p;
378 if (c->p >= c->lim)
379 goto look_for_not_found;
380 data = c->p;
381 ZX_LOOK_FOR(c, quote);
382 return data;
383 look_for_not_found:
384 zx_xml_parse_err(c, quote, func, "zx_dec_attr_val: char not found");
385 return 0;
386 }
387
388 /*() Tokenize an attribute.
389 * Lookup functions to convert a namespace qualified string to an integer token.
390 * The att2tok() functions come from xsd2sg.pl code generation via gperf.
391 * Internal function CSE.
392 * Starts with c->p pointing to beginning of attribute (with ns prefix, if any) */
393
394 /* Called by: zx_DEC_elem */
zx_attr_lookup(struct zx_ctx * c,struct zx_elem_s * x)395 static int zx_attr_lookup(struct zx_ctx* c, struct zx_elem_s* x)
396 {
397 const char* prefix;
398 const char* name;
399 const char* lim;
400 const char* data;
401 const char* p;
402 struct zx_ns_s* ns;
403 const struct zx_at_tok* zt;
404 struct zx_attr_s* attr;
405
406 ZX_SKIP_WS(c, ZX_TOK_ATTR_ERR);
407 if (ONE_OF_2(*c->p, '>', '/'))
408 return ZX_TOK_NO_ATTR;
409
410 attr = ZX_ZALLOC(c, struct zx_attr_s);
411 attr->name = (char*)(name = c->p);
412 if (!(data = zx_dec_attr_val(c, "attr_lookup"))) {
413 ZX_FREE(c, attr);
414 return ZX_TOK_ATTR_ERR;
415 }
416 attr->name_len = data - 2 - attr->name;
417 attr->g.s = (char*)data;
418 attr->g.len = c->p - attr->g.s;
419 lim = attr->g.s - 2;
420 /*attr->g.tok = ZX_TOK_NS_NOT_FOUND; / * Start with unknown namespace. */
421
422 p = memchr(name, ':', lim-name); /* look for namespace prefix */
423 if (p) {
424 prefix = name;
425 name = p+1;
426 } else
427 prefix = 0;
428
429 /* Look for namespace declaration. Skip as these were prescanned (see above in this file). */
430 if (prefix) {
431 if ((name-1)-prefix == sizeof("xmlns")-1 && !memcmp("xmlns", prefix, sizeof("xmlns")-1)) {
432 ZX_FREE(c, attr);
433 zx_xmlns_detected(c, x, data);
434 return ZX_TOK_XMLNS;
435 }
436 ns = zx_prefix_seen_whine(c, (name-1)-prefix, prefix, "attr_lookup", 0);
437 if (ns) {
438 attr->ns = ns;
439 if (ns->master)
440 ns = ns->master;
441 if (ns > c->ns_tab && ns - c->ns_tab < c->n_ns) {
442 attr->g.tok = (ns - c->ns_tab) << ZX_TOK_NS_SHIFT;
443 } else {
444 INFO("Non-native prefix(%.*s) attr(%.*s) in elem(%.*s)", ((int)((name-1)-prefix)), prefix, attr->name_len, attr->name, x->g.len, x->g.s);
445 }
446 } else {
447 INFO("Undeclared (and unknown) prefix(%.*s) attr(%.*s) in elem(%.*s)", ((int)((name-1)-prefix)), prefix, attr->name_len, attr->name, x->g.len, x->g.s);
448 }
449 } else {
450 if (lim-name == sizeof("xmlns")-1 && !memcmp("xmlns", name, sizeof("xmlns")-1)) {
451 ZX_FREE(c, attr);
452 zx_xmlns_detected(c, x, data);
453 return ZX_TOK_XMLNS;
454 }
455 /* Most attributes are namespaceless (or have containing element's namespace). */
456 }
457
458 attr->g.n = &x->attr->g;
459 x->attr = attr;
460 zt = zx_attr2tok(name, lim-name);
461 if (zt)
462 return attr->g.tok |= (zt - zx_at_tab);
463 return attr->g.tok |= ZX_TOK_ATTR_NOT_FOUND;
464 }
465
466 /*() Given token, find element descriptor. */
467
468 /* Called by: zx_DEC_elem, zx_ENC_WO_any_elem, zx_LEN_WO_any_elem, zx_chk_el_ord, zx_el_lookup, zx_new_elem */
zx_el_desc_lookup(int tok)469 struct zx_el_desc* zx_el_desc_lookup(int tok)
470 {
471 struct zx_el_desc* ed;
472 if (tok == ZX_TOK_NOT_FOUND)
473 return 0;
474 if (!IN_RANGE(tok & ZX_TOK_TOK_MASK, 0, zx__ELEM_MAX)) {
475 ERR("out of range token 0x%06x", tok);
476 return 0;
477 }
478 for (ed = zx_el_tab[tok & ZX_TOK_TOK_MASK].n; ed; ed = ed->n)
479 if (ed->tok == tok)
480 return ed;
481 ERR("unknown token 0x%06x", tok);
482 return 0;
483 }
484
485 /*() Lookup function to convert a namespace qualified string to an integer token.
486 * First namespace is looked up and then the element in namespace specific hash.
487 * The hash functions come from xsd2sg.pl code generation via gperf. */
488
489 /* Called by: zx_DEC_elem */
zx_el_lookup(struct zx_ctx * c,struct zx_elem_s * x,struct zx_ns_s ** pop_seenp)490 static struct zx_elem_s* zx_el_lookup(struct zx_ctx* c, struct zx_elem_s* x, struct zx_ns_s** pop_seenp)
491 {
492 struct zx_elem_s* el;
493 struct zx_ns_s* ns;
494 struct zx_ns_s* master_ns;
495 const struct zx_el_tok* zt;
496 const struct zx_el_desc* ed;
497 const char* full_name;
498 const char* name;
499 const char* prefix;
500 const char* p;
501 int tok;
502
503 if (!(name = zx_scan_elem_start(c, (const char*)__FUNCTION__)))
504 return 0;
505
506 *pop_seenp = zx_scan_xmlns(c); /* Prescan namespaces so token can be correctly recognized. */
507
508 full_name = name;
509 p = memchr(name, ':', c->p-name); /* look for namespace prefix */
510 if (p) {
511 prefix = name;
512 name = p+1;
513 } else
514 prefix = 0;
515
516 ns = zx_prefix_seen_whine(c, prefix ? (name-1)-prefix : 0, prefix, (const char*)__FUNCTION__,1);
517 master_ns = ns->master?ns->master:ns;
518 zt = zx_elem2tok(name, c->p - name);
519 if (zt) {
520 tok = ((master_ns - c->ns_tab) << ZX_TOK_NS_SHIFT) | (zt - zx_el_tab);
521 ed = zx_el_desc_lookup(tok);
522 if (ed) {
523 el = ZX_ALLOC(c, ed->siz);
524 ZERO(el, ed->siz);
525 } else
526 goto unknown_el;
527 } else {
528 unknown_el:
529 // Unknown element warnings are quite frequent and just clutter the logs. Downgrade.
530 //INFO("Unknown element <%.*s>, child of <%.*s>", ((int)(c->p - full_name)), full_name, x->g.len, x->g.s);
531 D("Unknown element <%.*s>, child of <%.*s>", ((int)(c->p - full_name)), full_name, x->g.len, x->g.s);
532 el = ZX_ZALLOC(c, struct zx_elem_s);
533 tok = ZX_TOK_NOT_FOUND;
534 }
535
536 el->g.tok = tok;
537 el->ns = ns;
538 el->g.s = (char*)full_name;
539 el->g.len = c->p - full_name;
540 el->g.n = &x->kids->g;
541 x->kids = el;
542 return el;
543 }
544
545 /*() Element Decoder. When per element decoder is called, the c->p
546 * will point to just past the element name. The element has already
547 * been allocated to the correct size and the namespace prescan has
548 * already been done (except when called from zx_dec_zx_root()). */
549
550 /* Called by: zx_DEC_elem, zx_dec_zx_root */
zx_DEC_elem(struct zx_ctx * c,struct zx_elem_s * x)551 void zx_DEC_elem(struct zx_ctx* c, struct zx_elem_s* x)
552 {
553 int tok MAYBE_UNUSED; /* Unused in zx_DEC_root() */
554 struct zx_el_desc* ed = zx_el_desc_lookup(x->g.tok);
555 struct zx_elem_s* el;
556 struct zx_ns_s* pop_seen;
557
558 if (x->g.tok != zx_root_ELEM) {
559 /* The tag name has already been detected. Process attributes until '>' */
560
561 for (; c->p < c->lim; ++c->p) {
562 tok = zx_attr_lookup(c, x);
563 switch (tok) {
564 case ZX_TOK_XMLNS: break;
565 case ZX_TOK_ATTR_NOT_FOUND: break;
566 case ZX_TOK_ATTR_ERR: return;
567 case ZX_TOK_NO_ATTR: goto no_attr;
568 default:
569 if (!ed || !ed->at_dec(c, x)) { /* element specific attribute processing */
570 D("Known attribute(%.*s) tok=0x%x in wrong context(%.*s)", x->attr->name_len, x->attr->name, x->attr->g.tok, x->g.len, x->g.s);
571 }
572 }
573 }
574 no_attr:
575 if (c->p < c->lim) {
576 ++c->p;
577 if (c->p < c->lim && c->p[-1] == '/' && c->p[0] == '>') { /* <Tag/> without content */
578 ++c->p;
579 goto out;
580 }
581 }
582 }
583
584 /* Process contents until '</' or end of string nul */
585
586 while (1) {
587 next_elem:
588 if (c->p >= c->lim)
589 goto out;
590 /*ZX_SKIP_WS(c,x); DO NOT SQUASH WHITESPACE! EXC-CANON NEEDS IT. */
591 if (*c->p == '<') {
592 potential_tag:
593 ++c->p;
594 if (c->p >= c->lim)
595 goto out;
596 switch (*c->p) {
597 case '?': /* processing instruction <?xml ... ?> */
598 case '!': /* comment <!-- ... --> */
599 if (zx_scan_pi_or_comment(c))
600 break;
601 goto next_elem;
602 case '/': /* close tag */
603 if (!zx_scan_elem_end(c, ((struct zx_elem_s*)x)->g.s, (const char*)__FUNCTION__))
604 return;
605 /* Legitimate close tag. Normal exit from this function. */
606 ++c->p;
607 goto out;
608 default:
609 if (AZaz_(*c->p)) {
610 el = zx_el_lookup(c, (struct zx_elem_s*)x, &pop_seen);
611 if (!el)
612 return;
613 zx_DEC_elem(c, el); /* read the kid on syntactic level */
614 /* element specific subelement processing: assign the kid to correct struct field */
615 if (!ed || !ed->el_dec(c, x)) {
616 if (el->g.tok != ZX_TOK_NOT_FOUND) {
617 D("Known element(%.*s) tok=0x%x in wrong context(%.*s)", el->g.len, el->g.s, el->g.tok, x->g.len, x->g.s);
618 el->g.tok = ZX_TOK_NOT_FOUND;
619 }
620 }
621 zx_pop_seen(pop_seen);
622 if (c->top1 && x->g.tok == zx_root_ELEM) /* to stop parse after single <e:Envelope> */
623 goto out;
624 goto next_elem;
625 }
626 }
627 /* false alarm <, fall thru */
628 }
629 if (!zx_scan_data(c, (struct zx_elem_s*)x))
630 return;
631 goto potential_tag;
632 }
633 out:
634 zx_reverse_elem_lists((struct zx_elem_s*)x);
635 }
636
637 /*() Prepare a context for decoding XML. The decoding operation will not
638 * alter the underlying data (e.g. no nuls are inserted, not even temporarily).
639 * N.B. Often you would wrap this in locks, like
640 * LOCK(cf->ctx->mx, "valid");
641 * zx_prepare_dec_ctx(cf->ctx, zx_ns_tab, n_ns, ss->s, ss->s + ss->len);
642 * r = zx_DEC_elem(cf->ctx, &r->gg);
643 * UNLOCK(cf->ctx->mx, "valid");
644 * or just see zx_dec_zx_root()
645 */
646
647 /* Called by: covimp_test, zx_dec_zx_root */
zx_prepare_dec_ctx(struct zx_ctx * c,struct zx_ns_s * ns_tab,int n_ns,const char * start,const char * lim)648 void zx_prepare_dec_ctx(struct zx_ctx* c, struct zx_ns_s* ns_tab, int n_ns, const char* start, const char* lim)
649 {
650 zx_reset_ns_ctx(c);
651 c->ns_tab = ns_tab;
652 c->n_ns = n_ns;
653 c->bas = c->p = start;
654 c->lim = lim;
655 }
656
657 /*(i) Decode arbitary xml with zx_ns_tab set of namespaces and parsers.
658 * The resulting data structure has linked lists in *inverted* order,
659 * i.e. last tag is first element of the list. */
660
661 /* Called by: main x6, sig_validate, test_ibm_cert_problem, zxid_add_env_if_needed x2, zxid_dec_a7n, zxid_decode_redir_or_post, zxid_decrypt_nameid, zxid_decrypt_newnym, zxid_di_query, zxid_find_epr, zxid_gen_boots, zxid_get_ses_sso_a7n x2, zxid_idp_soap_parse, zxid_mk_sa_attribute_ss, zxid_mk_xacml_simple_at, zxid_parse_meta, zxid_print_session, zxid_reg_svc, zxid_soap_call_raw, zxid_sp_soap_parse, zxid_str2a7n, zxid_str2nid, zxid_str2token, zxid_wsp_validate */
zx_dec_zx_root(struct zx_ctx * c,int len,const char * start,const char * func)662 struct zx_root_s* zx_dec_zx_root(struct zx_ctx* c, int len, const char* start, const char* func)
663 {
664 struct zx_root_s* r = zx_NEW_root(c, 0);
665 LOCK(c->mx, func);
666 zx_prepare_dec_ctx(c, zx_ns_tab, sizeof(zx_ns_tab)/sizeof(struct zx_ns_s), start, start + len);
667 zx_DEC_elem(c, &r->gg);
668 UNLOCK(c->mx, func);
669 return r;
670 }
671
672 /* EOF -- zxlibdec.c */
673