1 /* packet-xml.c
2 * wireshark's xml dissector .
3 *
4 * (C) 2005, Luis E. Garcia Ontanon.
5 *
6 * Refer to the AUTHORS file or the AUTHORS section in the man page
7 * for contacting the author(s) of this file.
8 *
9 * Wireshark - Network traffic analyzer
10 * By Gerald Combs <gerald@wireshark.org>
11 * Copyright 1998 Gerald Combs
12 *
13 * SPDX-License-Identifier: GPL-2.0-or-later
14 */
15
16 #include "config.h"
17
18 #include <string.h>
19 #include <errno.h>
20
21 #include <epan/packet.h>
22 #include <epan/tvbparse.h>
23 #include <epan/dtd.h>
24 #include <epan/proto_data.h>
25 #include <wsutil/filesystem.h>
26 #include <epan/prefs.h>
27 #include <epan/expert.h>
28 #include <epan/garrayfix.h>
29 #include <wsutil/str_util.h>
30 #include <wsutil/report_message.h>
31
32 #include "packet-xml.h"
33 #include "packet-acdr.h"
34
35 void proto_register_xml(void);
36 void proto_reg_handoff_xml(void);
37
38 struct _attr_reg_data {
39 wmem_array_t *hf;
40 const gchar *basename;
41 };
42
43
44 static gint ett_dtd = -1;
45 static gint ett_xmpli = -1;
46
47 static int hf_unknowwn_attrib = -1;
48 static int hf_comment = -1;
49 static int hf_xmlpi = -1;
50 static int hf_dtd_tag = -1;
51 static int hf_doctype = -1;
52 static int hf_cdatasection = -1;
53
54 static expert_field ei_xml_closing_unopened_tag = EI_INIT;
55 static expert_field ei_xml_closing_unopened_xmpli_tag = EI_INIT;
56 static expert_field ei_xml_unrecognized_text = EI_INIT;
57
58 /* dissector handles */
59 static dissector_handle_t xml_handle;
60
61 /* parser definitions */
62 static tvbparse_wanted_t *want;
63 static tvbparse_wanted_t *want_ignore;
64 static tvbparse_wanted_t *want_heur;
65
66 static wmem_map_t *xmpli_names;
67 static wmem_map_t *media_types;
68
69 static xml_ns_t xml_ns = {"xml", "/", -1, -1, -1, NULL, NULL, NULL};
70 static xml_ns_t unknown_ns = {"unknown", "?", -1, -1, -1, NULL, NULL, NULL};
71 static xml_ns_t *root_ns;
72
73 static gboolean pref_heuristic_unicode = FALSE;
74
75
76 #define XML_CDATA -1000
77 #define XML_SCOPED_NAME -1001
78
79
80 static wmem_array_t *hf_arr;
81 static GArray *ett_arr;
82
83 static const gchar *default_media_types[] = {
84 "text/xml",
85 "text/vnd.wap.wml",
86 "text/vnd.wap.si",
87 "text/vnd.wap.sl",
88 "text/vnd.wap.co",
89 "text/vnd.wap.emn",
90 "application/3gpp-ims+xml",
91 "application/atom+xml",
92 "application/auth-policy+xml",
93 "application/ccmp+xml",
94 "application/conference-info+xml", /*RFC4575*/
95 "application/cpim-pidf+xml",
96 "application/cpl+xml",
97 "application/dds-web+xml",
98 "application/im-iscomposing+xml", /*RFC3994*/
99 "application/load-control+xml", /*RFC7200*/
100 "application/mathml+xml",
101 "application/media_control+xml",
102 "application/note+xml",
103 "application/pidf+xml",
104 "application/pidf-diff+xml",
105 "application/poc-settings+xml",
106 "application/rdf+xml",
107 "application/reginfo+xml",
108 "application/resource-lists+xml",
109 "application/rlmi+xml",
110 "application/rls-services+xml",
111 "application/rss+xml",
112 "application/rs-metadata+xml",
113 "application/smil",
114 "application/simple-filter+xml",
115 "application/simple-message-summary+xml", /*RFC3842*/
116 "application/simservs+xml",
117 "application/soap+xml",
118 "application/vnd.etsi.aoc+xml",
119 "application/vnd.etsi.cug+xml",
120 "application/vnd.etsi.iptvcommand+xml",
121 "application/vnd.etsi.iptvdiscovery+xml",
122 "application/vnd.etsi.iptvprofile+xml",
123 "application/vnd.etsi.iptvsad-bc+xml",
124 "application/vnd.etsi.iptvsad-cod+xml",
125 "application/vnd.etsi.iptvsad-npvr+xml",
126 "application/vnd.etsi.iptvservice+xml",
127 "application/vnd.etsi.iptvsync+xml",
128 "application/vnd.etsi.iptvueprofile+xml",
129 "application/vnd.etsi.mcid+xml",
130 "application/vnd.etsi.overload-control-policy-dataset+xml",
131 "application/vnd.etsi.pstn+xml",
132 "application/vnd.etsi.sci+xml",
133 "application/vnd.etsi.simservs+xml",
134 "application/vnd.etsi.tsl+xml",
135 "application/vnd.oma.xdm-apd+xml",
136 "application/vnd.oma.fnl+xml",
137 "application/vnd.oma.access-permissions-list+xml",
138 "application/vnd.oma.alias-principals-list+xml",
139 "application/upp-directory+xml", /*OMA-ERELD-XDM-V2_2_1-20170124-A*/
140 "application/vnd.oma.xdm-hi+xml",
141 "application/vnd.oma.xdm-rhi+xml",
142 "application/vnd.oma.xdm-prefs+xml",
143 "application/vnd.oma.xdcp+xml",
144 "application/vnd.oma.bcast.associated-procedure-parameter+xml",
145 "application/vnd.oma.bcast.drm-trigger+xml",
146 "application/vnd.oma.bcast.imd+xml",
147 "application/vnd.oma.bcast.notification+xml",
148 "application/vnd.oma.bcast.sgdd+xml",
149 "application/vnd.oma.bcast.smartcard-trigger+xml",
150 "application/vnd.oma.bcast.sprov+xml",
151 "application/vnd.oma.cab-address-book+xml",
152 "application/vnd.oma.cab-feature-handler+xml",
153 "application/vnd.oma.cab-pcc+xml",
154 "application/vnd.oma.cab-subs-invite+xml",
155 "application/vnd.oma.cab-user-prefs+xml",
156 "application/vnd.oma.dd2+xml",
157 "application/vnd.oma.drm.risd+xml",
158 "application/vnd.oma.group-usage-list+xml",
159 "application/vnd.oma.pal+xml",
160 "application/vnd.oma.poc.detailed-progress-report+xml",
161 "application/vnd.oma.poc.final-report+xml",
162 "application/vnd.oma.poc.groups+xml",
163 "application/vnd.oma.poc.invocation-descriptor+xml",
164 "application/vnd.oma.poc.optimized-progress-report+xml",
165 "application/vnd.oma.scidm.messages+xml",
166 "application/vnd.oma.suppnot+xml", /*OMA-ERELD-Presence_SIMPLE-V2_0-20120710-A*/
167 "application/vnd.oma.xcap-directory+xml",
168 "application/vnd.omads-email+xml",
169 "application/vnd.omads-file+xml",
170 "application/vnd.omads-folder+xml",
171 "application/vnd.3gpp.access-transfer-events+xml",
172 "application/vnd.3gpp.bsf+xml",
173 "application/vnd.3gpp.comm-div-info+xml", /*3GPP TS 24.504 version 8.19.0*/
174 "application/vnd.3gpp.cw+xml",
175 "application/vnd.3gpp.iut+xml", /*3GPP TS 24.337*/
176 "application/vnc.3gpp.iut-config+xml", /*3GPP TS 24.337*/
177 "application/vnd.3gpp.mcptt-info+xml",
178 "application/vnd.3gpp.mid-call+xml",
179 "application/vnd.3gpp-prose-pc3ch+xml",
180 "application/vnd.3gpp-prose+xml",
181 "application/vnd.3gpp.replication+xml", /*3GPP TS 24.337*/
182 "application/vnd.3gpp.sms+xml",
183 "application/vnd.3gpp.srvcc-info+xml",
184 "application/vnd.3gpp.srvcc-ext+xml",
185 "application/vnd.3gpp.state-and-event-info+xml",
186 "application/vnd.3gpp.ussd+xml",
187 "application/vnd.3gpp2.bcmcsinfo+xml",
188 "application/vnd.wv.csp+xml",
189 "application/vnd.wv.csp.xml",
190 "application/watcherinfo+xml",
191 "application/xcap-att+xml",
192 "application/xcap-caps+xml",
193 "application/xcap-diff+xml",
194 "application/xcap-el+xml",
195 "application/xcap-error+xml",
196 "application/xcap-ns+xml",
197 "application/xml",
198 "application/xml-dtd",
199 "application/xpidf+xml",
200 "application/xslt+xml",
201 "application/x-crd+xml",
202 "application/x-wms-logconnectstats",
203 "application/x-wms-logplaystats",
204 "application/x-wms-sendevent",
205 "image/svg+xml",
206 "message/imdn+xml", /*RFC5438*/
207 };
208
insert_xml_frame(xml_frame_t * parent,xml_frame_t * new_child)209 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child)
210 {
211 new_child->first_child = NULL;
212 new_child->last_child = NULL;
213
214 new_child->parent = parent;
215 new_child->next_sibling = NULL;
216 new_child->prev_sibling = NULL;
217 if (parent == NULL) return; /* root */
218
219 if (parent->first_child == NULL) { /* the 1st child */
220 parent->first_child = new_child;
221 } else { /* following children */
222 parent->last_child->next_sibling = new_child;
223 new_child->prev_sibling = parent->last_child;
224 }
225 parent->last_child = new_child;
226 }
227
228 static int
dissect_xml(tvbuff_t * tvb,packet_info * pinfo,proto_tree * tree,void * data _U_)229 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data _U_)
230 {
231 tvbparse_t *tt;
232 static GPtrArray *stack;
233 xml_frame_t *current_frame;
234 const char *colinfo_str;
235 tvbuff_t *decoded;
236 guint16 try_bom;
237
238 if (stack != NULL)
239 g_ptr_array_free(stack, TRUE);
240
241 stack = g_ptr_array_new();
242 current_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
243 current_frame->type = XML_FRAME_ROOT;
244 current_frame->name = NULL;
245 current_frame->name_orig_case = NULL;
246 current_frame->value = NULL;
247 current_frame->pinfo = pinfo;
248 insert_xml_frame(NULL, current_frame);
249 g_ptr_array_add(stack, current_frame);
250
251 /* Detect and act on possible byte-order mark (BOM) */
252 try_bom = tvb_get_ntohs(tvb, 0);
253 if (try_bom == 0xFEFF) {
254 /* UTF-16BE */
255 const guint8 *data_str = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), ENC_UTF_16|ENC_BIG_ENDIAN);
256 size_t l = strlen(data_str);
257 decoded = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
258 add_new_data_source(pinfo, decoded, "Decoded UTF-16BE text");
259 }
260 else if(try_bom == 0xFFFE) {
261 /* UTF-16LE (or possibly UTF-32LE, but Wireshark doesn't support UTF-32) */
262 const guint8 *data_str = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), ENC_UTF_16|ENC_LITTLE_ENDIAN);
263 size_t l = strlen(data_str);
264 decoded = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
265 add_new_data_source(pinfo, decoded, "Decoded UTF-16LE text");
266 }
267 /* Could also test if try_bom is 0xnn00 or 0x00nn to guess endianness if we wanted */
268 else {
269 /* Assume it's UTF-8, either with or without BOM */
270 decoded = tvb;
271 }
272
273 tt = tvbparse_init(pinfo->pool, decoded, 0, -1, stack, want_ignore);
274 current_frame->start_offset = 0;
275 current_frame->length = tvb_captured_length(decoded);
276
277 root_ns = NULL;
278
279 if (pinfo->match_string)
280 root_ns = (xml_ns_t *)wmem_map_lookup(media_types, pinfo->match_string);
281
282 if (! root_ns ) {
283 root_ns = &xml_ns;
284 colinfo_str = "/XML";
285 } else {
286 char *colinfo_str_buf;
287 colinfo_str_buf = wmem_strconcat(wmem_packet_scope(), "/", root_ns->name, NULL);
288 ascii_strup_inplace(colinfo_str_buf);
289 colinfo_str = colinfo_str_buf;
290 }
291
292 col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
293
294 current_frame->ns = root_ns;
295
296 current_frame->item = proto_tree_add_item(tree, current_frame->ns->hf_tag, decoded, 0, -1, ENC_UTF_8|ENC_NA);
297 current_frame->tree = proto_item_add_subtree(current_frame->item, current_frame->ns->ett);
298 current_frame->last_item = current_frame->item;
299
300 while(tvbparse_get(tt, want)) ;
301
302 /* Save XML structure in case it is useful for the caller (only XMPP for now) */
303 p_add_proto_data(pinfo->pool, pinfo, xml_ns.hf_tag, 0, current_frame);
304
305 return tvb_captured_length(tvb);
306 }
307
dissect_xml_heur(tvbuff_t * tvb,packet_info * pinfo,proto_tree * tree,void * data)308 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
309 {
310 if (tvbparse_peek(tvbparse_init(pinfo->pool, tvb, 0, -1, NULL, want_ignore), want_heur)) {
311 dissect_xml(tvb, pinfo, tree, data);
312 return TRUE;
313 } else if (pref_heuristic_unicode) {
314 const guint8 *data_str;
315 tvbuff_t *unicode_tvb;
316 guint16 try_bom;
317 /* XXX - UCS-2, or UTF-16? */
318 gint enc = ENC_UCS_2|ENC_LITTLE_ENDIAN;
319 size_t l;
320
321 try_bom = tvb_get_ntohs(tvb, 0);
322 if (try_bom == 0xFEFF) {
323 enc = ENC_UTF_16|ENC_BIG_ENDIAN;
324 }
325 else if(try_bom == 0xFFFE) {
326 enc = ENC_UTF_16|ENC_LITTLE_ENDIAN;
327 }
328
329 data_str = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), enc);
330 l = strlen(data_str);
331 unicode_tvb = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
332 if (tvbparse_peek(tvbparse_init(pinfo->pool, unicode_tvb, 0, -1, NULL, want_ignore), want_heur)) {
333 add_new_data_source(pinfo, unicode_tvb, "UTF8");
334 dissect_xml(unicode_tvb, pinfo, tree, data);
335 return TRUE;
336 }
337 }
338 return FALSE;
339 }
340
xml_get_tag(xml_frame_t * frame,const gchar * name)341 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name)
342 {
343 xml_frame_t *tag = NULL;
344
345 xml_frame_t *xml_item = frame->first_child;
346 while (xml_item) {
347 if (xml_item->type == XML_FRAME_TAG) {
348 if (!name) { /* get the 1st tag */
349 tag = xml_item;
350 break;
351 } else if (xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
352 tag = xml_item;
353 break;
354 }
355 }
356 xml_item = xml_item->next_sibling;
357 }
358
359 return tag;
360 }
361
xml_get_attrib(xml_frame_t * frame,const gchar * name)362 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name)
363 {
364 xml_frame_t *attr = NULL;
365
366 xml_frame_t *xml_item = frame->first_child;
367 while (xml_item) {
368 if ((xml_item->type == XML_FRAME_ATTRIB) &&
369 xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
370 attr = xml_item;
371 break;
372 }
373 xml_item = xml_item->next_sibling;
374 }
375
376 return attr;
377 }
378
xml_get_cdata(xml_frame_t * frame)379 xml_frame_t *xml_get_cdata(xml_frame_t *frame)
380 {
381 xml_frame_t *cdata = NULL;
382
383 xml_frame_t *xml_item = frame->first_child;
384 while (xml_item) {
385 if (xml_item->type == XML_FRAME_CDATA) {
386 cdata = xml_item;
387 break;
388 }
389 xml_item = xml_item->next_sibling;
390 }
391
392 return cdata;
393 }
394
after_token(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)395 static void after_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
396 {
397 GPtrArray *stack = (GPtrArray *)tvbparse_data;
398 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
399 int hfid;
400 gboolean is_cdata = FALSE;
401 proto_item *pi;
402 xml_frame_t *new_frame;
403
404 if (tok->id == XML_CDATA) {
405 hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
406 is_cdata = TRUE;
407 } else if ( tok->id > 0) {
408 hfid = tok->id;
409 } else {
410 hfid = xml_ns.hf_cdata;
411 }
412
413 pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
414
415 proto_item_set_text(pi, "%s",
416 tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
417
418 if (is_cdata) {
419 new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
420 new_frame->type = XML_FRAME_CDATA;
421 new_frame->name = NULL;
422 new_frame->name_orig_case = NULL;
423 new_frame->value = tvb_new_subset_length(tok->tvb, tok->offset, tok->len);
424 insert_xml_frame(current_frame, new_frame);
425 new_frame->item = pi;
426 new_frame->last_item = pi;
427 new_frame->tree = NULL;
428 new_frame->start_offset = tok->offset;
429 new_frame->length = tok->len;
430 new_frame->ns = NULL;
431 new_frame->pinfo = current_frame->pinfo;
432 }
433 }
434
before_xmpli(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)435 static void before_xmpli(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
436 {
437 GPtrArray *stack = (GPtrArray *)tvbparse_data;
438 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
439 proto_item *pi;
440 proto_tree *pt;
441 tvbparse_elem_t *name_tok = tok->sub->next;
442 gchar *name = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
443 xml_ns_t *ns = (xml_ns_t *)wmem_map_lookup(xmpli_names, name);
444 xml_frame_t *new_frame;
445
446 int hf_tag;
447 gint ett;
448
449 ascii_strdown_inplace(name);
450 if (!ns) {
451 hf_tag = hf_xmlpi;
452 ett = ett_xmpli;
453 } else {
454 hf_tag = ns->hf_tag;
455 ett = ns->ett;
456 }
457
458 pi = proto_tree_add_item(current_frame->tree, hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
459
460 proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, (name_tok->offset - tok->offset) + name_tok->len));
461
462 pt = proto_item_add_subtree(pi, ett);
463
464 new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
465 new_frame->type = XML_FRAME_XMPLI;
466 new_frame->name = name;
467 new_frame->name_orig_case = name;
468 new_frame->value = NULL;
469 insert_xml_frame(current_frame, new_frame);
470 new_frame->item = pi;
471 new_frame->last_item = pi;
472 new_frame->tree = pt;
473 new_frame->start_offset = tok->offset;
474 new_frame->length = tok->len;
475 new_frame->ns = ns;
476 new_frame->pinfo = current_frame->pinfo;
477
478 g_ptr_array_add(stack, new_frame);
479
480 }
481
after_xmlpi(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)482 static void after_xmlpi(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
483 {
484 GPtrArray *stack = (GPtrArray *)tvbparse_data;
485 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
486
487 proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
488
489 if (stack->len > 1) {
490 g_ptr_array_remove_index_fast(stack, stack->len - 1);
491 } else {
492 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_xmpli_tag,
493 tok->tvb, tok->offset, tok->len);
494 }
495 }
496
before_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)497 static void before_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
498 {
499 GPtrArray *stack = (GPtrArray *)tvbparse_data;
500 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
501 tvbparse_elem_t *name_tok = tok->sub->next;
502 gchar *root_name;
503 gchar *name = NULL, *name_orig_case = NULL;
504 xml_ns_t *ns;
505 xml_frame_t *new_frame;
506 proto_item *pi;
507 proto_tree *pt;
508
509 if (name_tok->sub->id == XML_SCOPED_NAME) {
510 tvbparse_elem_t *root_tok = name_tok->sub->sub;
511 tvbparse_elem_t *leaf_tok = name_tok->sub->sub->next->next;
512 xml_ns_t *nameroot_ns;
513
514 root_name = (gchar *)tvb_get_string_enc(wmem_packet_scope(), root_tok->tvb, root_tok->offset, root_tok->len, ENC_ASCII);
515 name = (gchar *)tvb_get_string_enc(wmem_packet_scope(), leaf_tok->tvb, leaf_tok->offset, leaf_tok->len, ENC_ASCII);
516 name_orig_case = name;
517
518 nameroot_ns = (xml_ns_t *)wmem_map_lookup(xml_ns.elements, root_name);
519
520 if(nameroot_ns) {
521 ns = (xml_ns_t *)wmem_map_lookup(nameroot_ns->elements, name);
522 if (!ns) {
523 ns = &unknown_ns;
524 }
525 } else {
526 ns = &unknown_ns;
527 }
528
529 } else {
530 name = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
531 name_orig_case = wmem_strdup(wmem_packet_scope(), name);
532 ascii_strdown_inplace(name);
533
534 if(current_frame->ns) {
535 ns = (xml_ns_t *)wmem_map_lookup(current_frame->ns->elements, name);
536
537 if (!ns) {
538 if (! ( ns = (xml_ns_t *)wmem_map_lookup(root_ns->elements, name) ) ) {
539 ns = &unknown_ns;
540 }
541 }
542 } else {
543 ns = &unknown_ns;
544 }
545 }
546
547 pi = proto_tree_add_item(current_frame->tree, ns->hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
548 proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb,
549 tok->offset,
550 (name_tok->offset - tok->offset) + name_tok->len));
551
552 pt = proto_item_add_subtree(pi, ns->ett);
553
554 new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
555 new_frame->type = XML_FRAME_TAG;
556 new_frame->name = name;
557 new_frame->name_orig_case = name_orig_case;
558 new_frame->value = NULL;
559 insert_xml_frame(current_frame, new_frame);
560 new_frame->item = pi;
561 new_frame->last_item = pi;
562 new_frame->tree = pt;
563 new_frame->start_offset = tok->offset;
564 new_frame->length = tok->len;
565 new_frame->ns = ns;
566 new_frame->pinfo = current_frame->pinfo;
567
568 g_ptr_array_add(stack, new_frame);
569
570 }
571
after_open_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)572 static void after_open_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
573 {
574 GPtrArray *stack = (GPtrArray *)tvbparse_data;
575 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
576
577 proto_item_append_text(current_frame->last_item, ">");
578 }
579
after_closed_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)580 static void after_closed_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
581 {
582 GPtrArray *stack = (GPtrArray *)tvbparse_data;
583 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
584
585 proto_item_append_text(current_frame->last_item, "/>");
586
587 if (stack->len > 1) {
588 g_ptr_array_remove_index_fast(stack, stack->len - 1);
589 } else {
590 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
591 tok->tvb, tok->offset, tok->len);
592 }
593 }
594
after_untag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)595 static void after_untag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
596 {
597 GPtrArray *stack = (GPtrArray *)tvbparse_data;
598 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
599
600 proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
601 current_frame->length = (tok->offset - current_frame->start_offset) + tok->len;
602
603 proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
604
605 if (stack->len > 1) {
606 g_ptr_array_remove_index_fast(stack, stack->len - 1);
607 } else {
608 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
609 tok->tvb, tok->offset, tok->len);
610 }
611 }
612
before_dtd_doctype(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)613 static void before_dtd_doctype(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
614 {
615 GPtrArray *stack = (GPtrArray *)tvbparse_data;
616 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
617 xml_frame_t *new_frame;
618 tvbparse_elem_t *name_tok = tok->sub->next->next->next->sub->sub;
619 proto_tree *dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
620 name_tok->tvb, name_tok->offset,
621 name_tok->len, ENC_ASCII|ENC_NA);
622
623 proto_item_set_text(dtd_item, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
624
625 new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
626 new_frame->type = XML_FRAME_DTD_DOCTYPE;
627 new_frame->name = (gchar *)tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb,
628 name_tok->offset,
629 name_tok->len, ENC_ASCII);
630 new_frame->name_orig_case = new_frame->name;
631 new_frame->value = NULL;
632 insert_xml_frame(current_frame, new_frame);
633 new_frame->item = dtd_item;
634 new_frame->last_item = dtd_item;
635 new_frame->tree = proto_item_add_subtree(dtd_item, ett_dtd);
636 new_frame->start_offset = tok->offset;
637 new_frame->length = tok->len;
638 new_frame->ns = NULL;
639 new_frame->pinfo = current_frame->pinfo;
640
641 g_ptr_array_add(stack, new_frame);
642 }
643
pop_stack(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)644 static void pop_stack(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
645 {
646 GPtrArray *stack = (GPtrArray *)tvbparse_data;
647 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
648
649 if (stack->len > 1) {
650 g_ptr_array_remove_index_fast(stack, stack->len - 1);
651 } else {
652 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
653 tok->tvb, tok->offset, tok->len);
654 }
655 }
656
after_dtd_close(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)657 static void after_dtd_close(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
658 {
659 GPtrArray *stack = (GPtrArray *)tvbparse_data;
660 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
661
662 proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
663 if (stack->len > 1) {
664 g_ptr_array_remove_index_fast(stack, stack->len - 1);
665 } else {
666 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
667 tok->tvb, tok->offset, tok->len);
668 }
669 }
670
get_attrib_value(void * tvbparse_data _U_,const void * wanted_data _U_,tvbparse_elem_t * tok)671 static void get_attrib_value(void *tvbparse_data _U_, const void *wanted_data _U_, tvbparse_elem_t *tok)
672 {
673 tok->data = tok->sub;
674 }
675
after_attrib(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)676 static void after_attrib(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
677 {
678 GPtrArray *stack = (GPtrArray *)tvbparse_data;
679 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
680 gchar *name, *name_orig_case;
681 tvbparse_elem_t *value;
682 tvbparse_elem_t *value_part = (tvbparse_elem_t *)tok->sub->next->next->data;
683 int *hfidp;
684 int hfid;
685 proto_item *pi;
686 xml_frame_t *new_frame;
687
688 name = tvb_get_string_enc(wmem_packet_scope(), tok->sub->tvb, tok->sub->offset, tok->sub->len, ENC_ASCII);
689 name_orig_case = wmem_strdup(wmem_packet_scope(), name);
690 ascii_strdown_inplace(name);
691
692 if(current_frame->ns && (hfidp = (int *)wmem_map_lookup(current_frame->ns->attributes, name) )) {
693 hfid = *hfidp;
694 value = value_part;
695 } else {
696 hfid = hf_unknowwn_attrib;
697 value = tok;
698 }
699
700 pi = proto_tree_add_item(current_frame->tree, hfid, value->tvb, value->offset, value->len, ENC_UTF_8|ENC_NA);
701 proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
702
703 current_frame->last_item = pi;
704
705 new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
706 new_frame->type = XML_FRAME_ATTRIB;
707 new_frame->name = name;
708 new_frame->name_orig_case = name_orig_case;
709 new_frame->value = tvb_new_subset_length(value_part->tvb, value_part->offset,
710 value_part->len);
711 insert_xml_frame(current_frame, new_frame);
712 new_frame->item = pi;
713 new_frame->last_item = pi;
714 new_frame->tree = NULL;
715 new_frame->start_offset = tok->offset;
716 new_frame->length = tok->len;
717 new_frame->ns = NULL;
718 new_frame->pinfo = current_frame->pinfo;
719
720 }
721
unrecognized_token(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)722 static void unrecognized_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
723 {
724 GPtrArray *stack = (GPtrArray *)tvbparse_data;
725 xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
726
727 proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_unrecognized_text,
728 tok->tvb, tok->offset, tok->len);
729
730 }
731
732
733
init_xml_parser(void)734 static void init_xml_parser(void)
735 {
736 tvbparse_wanted_t *want_name =
737 tvbparse_chars(-1, 1, 0,
738 "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
739 NULL, NULL, NULL);
740 tvbparse_wanted_t *want_attr_name =
741 tvbparse_chars(-1, 1, 0,
742 "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",
743 NULL, NULL, NULL);
744
745 tvbparse_wanted_t *want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
746 want_name,
747 tvbparse_char(-1, ":", NULL, NULL, NULL),
748 want_name,
749 NULL);
750
751 tvbparse_wanted_t *want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
752 want_scoped_name,
753 want_name,
754 NULL);
755
756 tvbparse_wanted_t *want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
757 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\"', '\\'),
758 tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\'', '\\'),
759 tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
760 want_name,
761 NULL);
762
763 tvbparse_wanted_t *want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
764 tvbparse_set_seq(-1, NULL, NULL, after_attrib,
765 want_attr_name,
766 tvbparse_char(-1, "=", NULL, NULL, NULL),
767 want_attrib_value,
768 NULL));
769
770 tvbparse_wanted_t *want_stoptag = tvbparse_set_oneof(-1, NULL, NULL, NULL,
771 tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
772 tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
773 NULL);
774
775 tvbparse_wanted_t *want_stopxmlpi = tvbparse_string(-1, "?>", NULL, NULL, after_xmlpi);
776
777 tvbparse_wanted_t *want_comment = tvbparse_set_seq(hf_comment, NULL, NULL, after_token,
778 tvbparse_string(-1, "<!--", NULL, NULL, NULL),
779 tvbparse_until(-1, NULL, NULL, NULL,
780 tvbparse_string(-1, "-->", NULL, NULL, NULL),
781 TP_UNTIL_INCLUDE),
782 NULL);
783
784 tvbparse_wanted_t *want_cdatasection = tvbparse_set_seq(hf_cdatasection, NULL, NULL, after_token,
785 tvbparse_string(-1, "<![CDATA[", NULL, NULL, NULL),
786 tvbparse_until(-1, NULL, NULL, NULL,
787 tvbparse_string(-1, "]]>", NULL, NULL, NULL),
788 TP_UNTIL_INCLUDE),
789 NULL);
790
791 tvbparse_wanted_t *want_xmlpi = tvbparse_set_seq(hf_xmlpi, NULL, before_xmpli, NULL,
792 tvbparse_string(-1, "<?", NULL, NULL, NULL),
793 want_name,
794 tvbparse_set_oneof(-1, NULL, NULL, NULL,
795 want_stopxmlpi,
796 tvbparse_set_seq(-1, NULL, NULL, NULL,
797 want_attributes,
798 want_stopxmlpi,
799 NULL),
800 NULL),
801 NULL);
802
803 tvbparse_wanted_t *want_closing_tag = tvbparse_set_seq(0, NULL, NULL, after_untag,
804 tvbparse_char(-1, "<", NULL, NULL, NULL),
805 tvbparse_char(-1, "/", NULL, NULL, NULL),
806 want_tag_name,
807 tvbparse_char(-1, ">", NULL, NULL, NULL),
808 NULL);
809
810 tvbparse_wanted_t *want_doctype_start = tvbparse_set_seq(-1, NULL, before_dtd_doctype, NULL,
811 tvbparse_char(-1, "<", NULL, NULL, NULL),
812 tvbparse_char(-1, "!", NULL, NULL, NULL),
813 tvbparse_casestring(-1, "DOCTYPE", NULL, NULL, NULL),
814 tvbparse_set_oneof(-1, NULL, NULL, NULL,
815 tvbparse_set_seq(-1, NULL, NULL, NULL,
816 want_name,
817 tvbparse_char(-1, "[", NULL, NULL, NULL),
818 NULL),
819 tvbparse_set_seq(-1, NULL, NULL, pop_stack,
820 want_name,
821 tvbparse_set_oneof(-1, NULL, NULL, NULL,
822 tvbparse_casestring(-1, "PUBLIC", NULL, NULL, NULL),
823 tvbparse_casestring(-1, "SYSTEM", NULL, NULL, NULL),
824 NULL),
825 tvbparse_until(-1, NULL, NULL, NULL,
826 tvbparse_char(-1, ">", NULL, NULL, NULL),
827 TP_UNTIL_INCLUDE),
828 NULL),
829 NULL),
830 NULL);
831
832 tvbparse_wanted_t *want_dtd_tag = tvbparse_set_seq(hf_dtd_tag, NULL, NULL, after_token,
833 tvbparse_char(-1, "<", NULL, NULL, NULL),
834 tvbparse_char(-1, "!", NULL, NULL, NULL),
835 tvbparse_until(-1, NULL, NULL, NULL,
836 tvbparse_char(-1, ">", NULL, NULL, NULL),
837 TP_UNTIL_INCLUDE),
838 NULL);
839
840 tvbparse_wanted_t *want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
841 tvbparse_char(-1, "<", NULL, NULL, NULL),
842 want_tag_name,
843 tvbparse_set_oneof(-1, NULL, NULL, NULL,
844 tvbparse_set_seq(-1, NULL, NULL, NULL,
845 want_attributes,
846 want_stoptag,
847 NULL),
848 want_stoptag,
849 NULL),
850 NULL);
851
852 tvbparse_wanted_t *want_dtd_close = tvbparse_set_seq(-1, NULL, NULL, after_dtd_close,
853 tvbparse_char(-1, "]", NULL, NULL, NULL),
854 tvbparse_char(-1, ">", NULL, NULL, NULL),
855 NULL);
856
857 want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL);
858
859
860 want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
861 want_comment,
862 want_cdatasection,
863 want_xmlpi,
864 want_closing_tag,
865 want_doctype_start,
866 want_dtd_close,
867 want_dtd_tag,
868 want_tag,
869 tvbparse_not_chars(XML_CDATA, 1, 0, "<", NULL, NULL, after_token),
870 tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, unrecognized_token),
871 NULL);
872
873 want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
874 want_comment,
875 want_cdatasection,
876 want_xmlpi,
877 want_doctype_start,
878 want_dtd_tag,
879 want_tag,
880 NULL);
881
882 }
883
884
xml_new_namespace(wmem_map_t * hash,const gchar * name,...)885 static xml_ns_t *xml_new_namespace(wmem_map_t *hash, const gchar *name, ...)
886 {
887 xml_ns_t *ns = wmem_new(wmem_epan_scope(), xml_ns_t);
888 va_list ap;
889 gchar *attr_name;
890
891 ns->name = wmem_strdup(wmem_epan_scope(), name);
892 ns->hf_tag = -1;
893 ns->hf_cdata = -1;
894 ns->ett = -1;
895 ns->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
896 ns->elements = NULL;
897
898 va_start(ap, name);
899
900 while(( attr_name = va_arg(ap, gchar *) )) {
901 int *hfp = wmem_new(wmem_epan_scope(), int);
902 *hfp = -1;
903 wmem_map_insert(ns->attributes, wmem_strdup(wmem_epan_scope(), attr_name), hfp);
904 };
905
906 va_end(ap);
907
908 wmem_map_insert(hash, ns->name, ns);
909
910 return ns;
911 }
912
913
add_xml_field(wmem_array_t * hfs,int * p_id,const gchar * name,const gchar * fqn)914 static void add_xml_field(wmem_array_t *hfs, int *p_id, const gchar *name, const gchar *fqn)
915 {
916 hf_register_info hfri;
917
918 hfri.p_id = p_id;
919 hfri.hfinfo.name = name;
920 hfri.hfinfo.abbrev = fqn;
921 hfri.hfinfo.type = FT_STRING;
922 hfri.hfinfo.display = BASE_NONE;
923 hfri.hfinfo.strings = NULL;
924 hfri.hfinfo.bitmask = 0x0;
925 hfri.hfinfo.blurb = NULL;
926 HFILL_INIT(hfri);
927
928 wmem_array_append_one(hfs, hfri);
929 }
930
add_xml_attribute_names(gpointer k,gpointer v,gpointer p)931 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p)
932 {
933 struct _attr_reg_data *d = (struct _attr_reg_data *)p;
934 const gchar *basename = wmem_strconcat(wmem_epan_scope(), d->basename, ".", (gchar *)k, NULL);
935
936 add_xml_field(d->hf, (int*) v, (gchar *)k, basename);
937 }
938
939
add_xmlpi_namespace(gpointer k _U_,gpointer v,gpointer p)940 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p)
941 {
942 xml_ns_t *ns = (xml_ns_t *)v;
943 const gchar *basename = wmem_strconcat(wmem_epan_scope(), (gchar *)p, ".", ns->name, NULL);
944 gint *ett_p = &(ns->ett);
945 struct _attr_reg_data d;
946
947 add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
948
949 g_array_append_val(ett_arr, ett_p);
950
951 d.basename = basename;
952 d.hf = hf_arr;
953
954 wmem_map_foreach(ns->attributes, add_xml_attribute_names, &d);
955
956 }
957
destroy_dtd_data(dtd_build_data_t * dtd_data)958 static void destroy_dtd_data(dtd_build_data_t *dtd_data)
959 {
960 g_free(dtd_data->proto_name);
961 g_free(dtd_data->media_type);
962 g_free(dtd_data->description);
963 g_free(dtd_data->proto_root);
964
965 g_string_free(dtd_data->error, TRUE);
966
967 while(dtd_data->elements->len) {
968 dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->elements, 0);
969 g_ptr_array_free(nl->list, TRUE);
970 g_free(nl->name);
971 g_free(nl);
972 }
973
974 g_ptr_array_free(dtd_data->elements, TRUE);
975
976 while(dtd_data->attributes->len) {
977 dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->attributes, 0);
978 g_ptr_array_free(nl->list, TRUE);
979 g_free(nl->name);
980 g_free(nl);
981 }
982
983 g_ptr_array_free(dtd_data->attributes, TRUE);
984
985 g_free(dtd_data);
986 }
987
copy_attrib_item(gpointer k,gpointer v _U_,gpointer p)988 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p)
989 {
990 gchar *key = (gchar *)wmem_strdup(wmem_epan_scope(), (const gchar *)k);
991 int *value = wmem_new(wmem_epan_scope(), int);
992 wmem_map_t *dst = (wmem_map_t *)p;
993
994 *value = -1;
995 wmem_map_insert(dst, key, value);
996
997 }
998
copy_attributes_hash(wmem_map_t * src)999 static wmem_map_t *copy_attributes_hash(wmem_map_t *src)
1000 {
1001 wmem_map_t *dst = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1002
1003 wmem_map_foreach(src, copy_attrib_item, dst);
1004
1005 return dst;
1006 }
1007
duplicate_element(xml_ns_t * orig)1008 static xml_ns_t *duplicate_element(xml_ns_t *orig)
1009 {
1010 xml_ns_t *new_item = wmem_new(wmem_epan_scope(), xml_ns_t);
1011 guint i;
1012
1013 new_item->name = wmem_strdup(wmem_epan_scope(), orig->name);
1014 new_item->hf_tag = -1;
1015 new_item->hf_cdata = -1;
1016 new_item->ett = -1;
1017 new_item->attributes = copy_attributes_hash(orig->attributes);
1018 new_item->elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1019 new_item->element_names = g_ptr_array_new();
1020
1021 for(i=0; i < orig->element_names->len; i++) {
1022 g_ptr_array_add(new_item->element_names,
1023 g_ptr_array_index(orig->element_names, i));
1024 }
1025
1026 return new_item;
1027 }
1028
fully_qualified_name(GPtrArray * hier,gchar * name,gchar * proto_name)1029 static gchar *fully_qualified_name(GPtrArray *hier, gchar *name, gchar *proto_name)
1030 {
1031 guint i;
1032 wmem_strbuf_t *s = wmem_strbuf_new(wmem_epan_scope(), proto_name);
1033
1034 wmem_strbuf_append(s, ".");
1035
1036 for (i = 1; i < hier->len; i++) {
1037 wmem_strbuf_append_printf(s, "%s.", (gchar *)g_ptr_array_index(hier, i));
1038 }
1039
1040 wmem_strbuf_append(s, name);
1041
1042 return wmem_strbuf_finalize(s);;
1043 }
1044
1045
make_xml_hier(gchar * elem_name,xml_ns_t * root,wmem_map_t * elements,GPtrArray * hier,GString * error,wmem_array_t * hfs,GArray * etts,char * proto_name)1046 static xml_ns_t *make_xml_hier(gchar *elem_name,
1047 xml_ns_t *root,
1048 wmem_map_t *elements,
1049 GPtrArray *hier,
1050 GString *error,
1051 wmem_array_t *hfs,
1052 GArray *etts,
1053 char *proto_name)
1054 {
1055 xml_ns_t *fresh;
1056 xml_ns_t *orig;
1057 gchar *fqn;
1058 gint *ett_p;
1059 gboolean recurred = FALSE;
1060 guint i;
1061 struct _attr_reg_data d;
1062
1063 if ( g_str_equal(elem_name, root->name) ) {
1064 return NULL;
1065 }
1066
1067 if (! ( orig = (xml_ns_t *)wmem_map_lookup(elements, elem_name) )) {
1068 g_string_append_printf(error, "element '%s' is not defined\n", elem_name);
1069 return NULL;
1070 }
1071
1072 for (i = 0; i < hier->len; i++) {
1073 if( (elem_name) && (strcmp(elem_name, (gchar *) g_ptr_array_index(hier, i) ) == 0 )) {
1074 recurred = TRUE;
1075 }
1076 }
1077
1078 if (recurred) {
1079 return NULL;
1080 }
1081
1082 fqn = fully_qualified_name(hier, elem_name, proto_name);
1083
1084 fresh = duplicate_element(orig);
1085 fresh->fqn = fqn;
1086
1087 add_xml_field(hfs, &(fresh->hf_tag), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1088 add_xml_field(hfs, &(fresh->hf_cdata), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1089
1090 ett_p = &fresh->ett;
1091 g_array_append_val(etts, ett_p);
1092
1093 d.basename = fqn;
1094 d.hf = hfs;
1095
1096 wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1097
1098 while(fresh->element_names->len) {
1099 gchar *child_name = (gchar *)g_ptr_array_remove_index(fresh->element_names, 0);
1100 xml_ns_t *child_element = NULL;
1101
1102 g_ptr_array_add(hier, elem_name);
1103 child_element = make_xml_hier(child_name, root, elements, hier, error, hfs, etts, proto_name);
1104 g_ptr_array_remove_index_fast(hier, hier->len - 1);
1105
1106 if (child_element) {
1107 wmem_map_insert(fresh->elements, child_element->name, child_element);
1108 }
1109 }
1110
1111 g_ptr_array_free(fresh->element_names, TRUE);
1112 fresh->element_names = NULL;
1113 return fresh;
1114 }
1115
free_elements(gpointer k _U_,gpointer v,gpointer p _U_)1116 static void free_elements(gpointer k _U_, gpointer v, gpointer p _U_)
1117 {
1118 xml_ns_t *e = (xml_ns_t *)v;
1119
1120 while (e->element_names->len) {
1121 g_free(g_ptr_array_remove_index(e->element_names, 0));
1122 }
1123
1124 g_ptr_array_free(e->element_names, TRUE);
1125 }
1126
register_dtd(dtd_build_data_t * dtd_data,GString * errors)1127 static void register_dtd(dtd_build_data_t *dtd_data, GString *errors)
1128 {
1129 wmem_map_t *elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1130 gchar *root_name = NULL;
1131 xml_ns_t *root_element = NULL;
1132 wmem_array_t *hfs;
1133 GArray *etts;
1134 GPtrArray *hier;
1135 gchar *curr_name;
1136 GPtrArray *element_names = g_ptr_array_new();
1137
1138 /* we first populate elements with the those coming from the parser */
1139 while(dtd_data->elements->len) {
1140 dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->elements, 0);
1141 xml_ns_t *element = wmem_new(wmem_epan_scope(), xml_ns_t);
1142
1143 /* we will use the first element found as root in case no other one was given. */
1144 if (root_name == NULL)
1145 root_name = wmem_strdup(wmem_epan_scope(), nl->name);
1146
1147 element->name = wmem_strdup(wmem_epan_scope(), nl->name);
1148 element->element_names = nl->list;
1149 element->hf_tag = -1;
1150 element->hf_cdata = -1;
1151 element->ett = -1;
1152 element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1153 element->elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1154
1155 if( wmem_map_lookup(elements, element->name) ) {
1156 g_string_append_printf(errors, "element %s defined more than once\n", element->name);
1157 free_elements(NULL, element, NULL);
1158 } else {
1159 wmem_map_insert(elements, element->name, element);
1160 g_ptr_array_add(element_names, wmem_strdup(wmem_epan_scope(), element->name));
1161 }
1162
1163 g_free(nl->name);
1164 g_free(nl);
1165 }
1166
1167 /* then we add the attributes to its relative elements */
1168 while(dtd_data->attributes->len) {
1169 dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->attributes, 0);
1170 xml_ns_t *element = (xml_ns_t *)wmem_map_lookup(elements, nl->name);
1171
1172 if (element) {
1173 while(nl->list->len) {
1174 gchar *name = (gchar *)g_ptr_array_remove_index(nl->list, 0);
1175 int *id_p = wmem_new(wmem_epan_scope(), int);
1176
1177 *id_p = -1;
1178 wmem_map_insert(element->attributes, wmem_strdup(wmem_epan_scope(), name), id_p);
1179 g_free(name); }
1180 }
1181 else {
1182 g_string_append_printf(errors, "element %s is not defined\n", nl->name);
1183 }
1184
1185 g_free(nl->name);
1186 g_ptr_array_free(nl->list, TRUE);
1187 g_free(nl);
1188 }
1189
1190 /* if a proto_root is defined in the dtd we'll use that as root */
1191 if( dtd_data->proto_root ) {
1192 wmem_free(wmem_epan_scope(), root_name);
1193 root_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_root);
1194 }
1195
1196 /* we use a stack with the names to avoid recurring infinitelly */
1197 hier = g_ptr_array_new();
1198
1199 /*
1200 * if a proto name was given in the dtd the dtd will be used as a protocol
1201 * or else the dtd will be loaded as a branch of the xml namespace
1202 */
1203 if( ! dtd_data->proto_name ) {
1204 hfs = hf_arr;
1205 etts = ett_arr;
1206 g_ptr_array_add(hier, wmem_strdup(wmem_epan_scope(), "xml"));
1207 } else {
1208 /*
1209 * if we were given a proto_name the namespace will be registered
1210 * as an independent protocol with its own hf and ett arrays.
1211 */
1212 hfs = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1213 etts = g_array_new(FALSE, FALSE, sizeof(gint *));
1214 }
1215
1216 /* the root element of the dtd's namespace */
1217 root_element = wmem_new(wmem_epan_scope(), xml_ns_t);
1218 root_element->name = wmem_strdup(wmem_epan_scope(), root_name);
1219 root_element->fqn = dtd_data->proto_name ? wmem_strdup(wmem_epan_scope(), dtd_data->proto_name) : root_element->name;
1220 root_element->hf_tag = -1;
1221 root_element->hf_cdata = -1;
1222 root_element->ett = -1;
1223 root_element->elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1224 root_element->element_names = element_names;
1225
1226 /*
1227 * we can either create a namespace as a flat namespace
1228 * in which all the elements are at the root level
1229 * or we can create a recursive namespace
1230 */
1231 if (dtd_data->recursion) {
1232 xml_ns_t *orig_root;
1233
1234 make_xml_hier(root_name, root_element, elements, hier, errors, hfs, etts, dtd_data->proto_name);
1235
1236 wmem_map_insert(root_element->elements, (gpointer)root_element->name, root_element);
1237
1238 orig_root = (xml_ns_t *)wmem_map_lookup(elements, root_name);
1239
1240 /* if the root element was defined copy its attrlist to the child */
1241 if(orig_root) {
1242 struct _attr_reg_data d;
1243
1244 d.basename = dtd_data->proto_name;
1245 d.hf = hfs;
1246
1247 root_element->attributes = copy_attributes_hash(orig_root->attributes);
1248 wmem_map_foreach(root_element->attributes, add_xml_attribute_names, &d);
1249 } else {
1250 root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1251 }
1252
1253 /* we then create all the sub hierarchies to catch the recurred cases */
1254 g_ptr_array_add(hier, root_name);
1255
1256 while(root_element->element_names->len) {
1257 curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1258
1259 if( ! wmem_map_lookup(root_element->elements, curr_name) ) {
1260 xml_ns_t *fresh = make_xml_hier(curr_name, root_element, elements, hier, errors,
1261 hfs, etts, dtd_data->proto_name);
1262 wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1263 }
1264 }
1265
1266 } else {
1267 /* a flat namespace */
1268 g_ptr_array_add(hier, root_name);
1269
1270 root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1271
1272 while(root_element->element_names->len) {
1273 xml_ns_t *fresh;
1274 gint *ett_p;
1275 struct _attr_reg_data d;
1276
1277 curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1278 fresh = duplicate_element((xml_ns_t *)wmem_map_lookup(elements, curr_name));
1279 fresh->fqn = fully_qualified_name(hier, curr_name, root_name);
1280
1281 add_xml_field(hfs, &(fresh->hf_tag), curr_name, fresh->fqn);
1282 add_xml_field(hfs, &(fresh->hf_cdata), curr_name, fresh->fqn);
1283
1284 d.basename = fresh->fqn;
1285 d.hf = hfs;
1286
1287 wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1288
1289 ett_p = &fresh->ett;
1290 g_array_append_val(etts, ett_p);
1291
1292 g_ptr_array_free(fresh->element_names, TRUE);
1293
1294 wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1295 }
1296 }
1297
1298 g_ptr_array_free(element_names, TRUE);
1299
1300 g_ptr_array_free(hier, TRUE);
1301
1302 /*
1303 * if we were given a proto_name the namespace will be registered
1304 * as an independent protocol.
1305 */
1306 if( dtd_data->proto_name ) {
1307 gint *ett_p;
1308 gchar *full_name, *short_name;
1309
1310 if (dtd_data->description) {
1311 full_name = wmem_strdup(wmem_epan_scope(), dtd_data->description);
1312 } else {
1313 full_name = wmem_strdup(wmem_epan_scope(), root_name);
1314 }
1315 short_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_name);
1316
1317 ett_p = &root_element->ett;
1318 g_array_append_val(etts, ett_p);
1319
1320 add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1321
1322 root_element->hf_tag = proto_register_protocol(full_name, short_name, short_name);
1323 proto_register_field_array(root_element->hf_tag, (hf_register_info*)wmem_array_get_raw(hfs), wmem_array_get_count(hfs));
1324 proto_register_subtree_array((gint **)g_array_data(etts), etts->len);
1325
1326 if (dtd_data->media_type) {
1327 gchar* media_type = wmem_strdup(wmem_epan_scope(), dtd_data->media_type);
1328 wmem_map_insert(media_types, media_type, root_element);
1329 }
1330
1331 g_array_free(etts, TRUE);
1332 }
1333
1334 wmem_map_insert(xml_ns.elements, root_element->name, root_element);
1335 wmem_map_foreach(elements, free_elements, NULL);
1336
1337 destroy_dtd_data(dtd_data);
1338 wmem_free(wmem_epan_scope(), root_name);
1339 }
1340
1341 # define DIRECTORY_T GDir
1342 # define FILE_T gchar
1343 # define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1344 # define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1345 # define GETFNAME_OP(file) (file);
1346 # define CLOSEDIR_OP(dir) g_dir_close(dir)
1347
init_xml_names(void)1348 static void init_xml_names(void)
1349 {
1350 guint i;
1351 DIRECTORY_T *dir;
1352 const FILE_T *file;
1353 const gchar *filename;
1354 gchar *dirname;
1355
1356 GError **dummy = wmem_new(wmem_epan_scope(), GError *);
1357 *dummy = NULL;
1358
1359 xmpli_names = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1360 media_types = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1361
1362 unknown_ns.elements = xml_ns.elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1363 unknown_ns.attributes = xml_ns.attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1364
1365 xml_new_namespace(xmpli_names, "xml", "version", "encoding", "standalone", NULL);
1366
1367 dirname = get_persconffile_path("dtds", FALSE);
1368
1369 if (test_for_directory(dirname) != EISDIR) {
1370 /* Although dir isn't a directory it may still use memory */
1371 g_free(dirname);
1372 dirname = get_datafile_path("dtds");
1373 }
1374
1375 if (test_for_directory(dirname) == EISDIR) {
1376 if ((dir = OPENDIR_OP(dirname)) != NULL) {
1377 GString *errors = g_string_new("");
1378
1379 while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1380 guint namelen;
1381 filename = GETFNAME_OP(file);
1382
1383 namelen = (int)strlen(filename);
1384 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4), ".dtd") == 0 ) ) {
1385 GString *preparsed;
1386 dtd_build_data_t *dtd_data;
1387
1388 g_string_truncate(errors, 0);
1389 preparsed = dtd_preparse(dirname, filename, errors);
1390
1391 if (errors->len) {
1392 report_failure("Dtd Preparser in file %s%c%s: %s",
1393 dirname, G_DIR_SEPARATOR, filename, errors->str);
1394 continue;
1395 }
1396
1397 dtd_data = dtd_parse(preparsed);
1398
1399 g_string_free(preparsed, TRUE);
1400
1401 if (dtd_data->error->len) {
1402 report_failure("Dtd Parser in file %s%c%s: %s",
1403 dirname, G_DIR_SEPARATOR, filename, dtd_data->error->str);
1404 destroy_dtd_data(dtd_data);
1405 continue;
1406 }
1407
1408 register_dtd(dtd_data, errors);
1409
1410 if (errors->len) {
1411 report_failure("Dtd Registration in file: %s%c%s: %s",
1412 dirname, G_DIR_SEPARATOR, filename, errors->str);
1413 continue;
1414 }
1415 }
1416 }
1417 g_string_free(errors, TRUE);
1418
1419 CLOSEDIR_OP(dir);
1420 }
1421 }
1422
1423 g_free(dirname);
1424
1425 for(i=0;i<array_length(default_media_types);i++) {
1426 if( ! wmem_map_lookup(media_types, default_media_types[i]) ) {
1427 wmem_map_insert(media_types, (gpointer)default_media_types[i], &xml_ns);
1428 }
1429 }
1430
1431 wmem_map_foreach(xmpli_names, add_xmlpi_namespace, (gpointer)"xml.xmlpi");
1432
1433 wmem_free(wmem_epan_scope(), dummy);
1434 }
1435
1436 void
proto_register_xml(void)1437 proto_register_xml(void)
1438 {
1439 static gint *ett_base[] = {
1440 &unknown_ns.ett,
1441 &xml_ns.ett,
1442 &ett_dtd,
1443 &ett_xmpli
1444 };
1445
1446 static hf_register_info hf_base[] = {
1447 { &hf_xmlpi,
1448 {"XMLPI", "xml.xmlpi",
1449 FT_STRING, BASE_NONE, NULL, 0,
1450 NULL, HFILL }
1451 },
1452 { &hf_cdatasection,
1453 {"CDATASection", "xml.cdatasection",
1454 FT_STRING, BASE_NONE, NULL, 0,
1455 NULL, HFILL }
1456 },
1457 { &hf_comment,
1458 {"Comment", "xml.comment",
1459 FT_STRING, BASE_NONE, NULL, 0,
1460 NULL, HFILL }
1461 },
1462 { &hf_unknowwn_attrib,
1463 {"Attribute", "xml.attribute",
1464 FT_STRING, BASE_NONE, NULL, 0,
1465 NULL, HFILL }
1466 },
1467 { &hf_doctype,
1468 {"Doctype", "xml.doctype",
1469 FT_STRING, BASE_NONE, NULL, 0,
1470 NULL, HFILL }
1471 },
1472 { &hf_dtd_tag,
1473 {"DTD Tag", "xml.dtdtag",
1474 FT_STRING, BASE_NONE, NULL, 0,
1475 NULL, HFILL }
1476 },
1477 { &unknown_ns.hf_cdata,
1478 {"CDATA", "xml.cdata",
1479 FT_STRING, BASE_NONE, NULL, 0, NULL,
1480 HFILL }
1481 },
1482 { &unknown_ns.hf_tag,
1483 {"Tag", "xml.tag",
1484 FT_STRING, BASE_NONE, NULL, 0,
1485 NULL, HFILL }
1486 },
1487 { &xml_ns.hf_cdata,
1488 {"Unknown", "xml.unknown",
1489 FT_STRING, BASE_NONE, NULL, 0,
1490 NULL, HFILL }
1491 }
1492 };
1493
1494 static ei_register_info ei[] = {
1495 { &ei_xml_closing_unopened_tag, { "xml.closing_unopened_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened tag", EXPFILL }},
1496 { &ei_xml_closing_unopened_xmpli_tag, { "xml.closing_unopened_xmpli_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened xmpli tag", EXPFILL }},
1497 { &ei_xml_unrecognized_text, { "xml.unrecognized_text", PI_PROTOCOL, PI_WARN, "Unrecognized text", EXPFILL }},
1498 };
1499
1500 module_t *xml_module;
1501 expert_module_t* expert_xml;
1502
1503 hf_arr = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1504 ett_arr = g_array_new(FALSE, FALSE, sizeof(gint *));
1505
1506 wmem_array_append(hf_arr, hf_base, array_length(hf_base));
1507 g_array_append_vals(ett_arr, ett_base, array_length(ett_base));
1508
1509 init_xml_names();
1510
1511 xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1512
1513 proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)wmem_array_get_raw(hf_arr), wmem_array_get_count(hf_arr));
1514 proto_register_subtree_array((gint **)g_array_data(ett_arr), ett_arr->len);
1515 expert_xml = expert_register_protocol(xml_ns.hf_tag);
1516 expert_register_field_array(expert_xml, ei, array_length(ei));
1517
1518 xml_module = prefs_register_protocol(xml_ns.hf_tag, NULL);
1519 prefs_register_obsolete_preference(xml_module, "heuristic");
1520 prefs_register_obsolete_preference(xml_module, "heuristic_tcp");
1521 prefs_register_obsolete_preference(xml_module, "heuristic_udp");
1522 /* XXX - UCS-2, or UTF-16? */
1523 prefs_register_bool_preference(xml_module, "heuristic_unicode", "Use Unicode in heuristics",
1524 "Try to recognize XML encoded in Unicode (UCS-2BE)",
1525 &pref_heuristic_unicode);
1526
1527 g_array_free(ett_arr, TRUE);
1528
1529 xml_handle = register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1530
1531 init_xml_parser();
1532 }
1533
1534 static void
add_dissector_media(gpointer k,gpointer v _U_,gpointer p _U_)1535 add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_)
1536 {
1537 dissector_add_string("media_type", (gchar *)k, xml_handle);
1538 }
1539
1540 void
proto_reg_handoff_xml(void)1541 proto_reg_handoff_xml(void)
1542 {
1543 wmem_map_foreach(media_types, add_dissector_media, NULL);
1544 dissector_add_uint_range_with_preference("tcp.port", "", xml_handle);
1545
1546 heur_dissector_add("http", dissect_xml_heur, "XML in HTTP", "xml_http", xml_ns.hf_tag, HEURISTIC_DISABLE);
1547 heur_dissector_add("sip", dissect_xml_heur, "XML in SIP", "xml_sip", xml_ns.hf_tag, HEURISTIC_DISABLE);
1548 heur_dissector_add("media", dissect_xml_heur, "XML in media", "xml_media", xml_ns.hf_tag, HEURISTIC_DISABLE);
1549 heur_dissector_add("tcp", dissect_xml_heur, "XML over TCP", "xml_tcp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1550 heur_dissector_add("udp", dissect_xml_heur, "XML over UDP", "xml_udp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1551
1552 heur_dissector_add("wtap_file", dissect_xml_heur, "XML file", "xml_wtap", xml_ns.hf_tag, HEURISTIC_ENABLE);
1553
1554 dissector_add_uint("acdr.tls_application", TLS_APP_XML, xml_handle);
1555 }
1556
1557 /*
1558 * Editor modelines - https://www.wireshark.org/tools/modelines.html
1559 *
1560 * Local variables:
1561 * c-basic-offset: 4
1562 * tab-width: 8
1563 * indent-tabs-mode: nil
1564 * End:
1565 *
1566 * vi: set shiftwidth=4 tabstop=8 expandtab:
1567 * :indentSize=4:tabSize=8:noTabs=true:
1568 */
1569