1 /* packet-xml.c
2  * wireshark's xml dissector .
3  *
4  * (C) 2005, Luis E. Garcia Ontanon.
5  *
6  * Refer to the AUTHORS file or the AUTHORS section in the man page
7  * for contacting the author(s) of this file.
8  *
9  * Wireshark - Network traffic analyzer
10  * By Gerald Combs <gerald@wireshark.org>
11  * Copyright 1998 Gerald Combs
12  *
13  * SPDX-License-Identifier: GPL-2.0-or-later
14  */
15 
16 #include "config.h"
17 
18 #include <string.h>
19 #include <errno.h>
20 
21 #include <epan/packet.h>
22 #include <epan/tvbparse.h>
23 #include <epan/dtd.h>
24 #include <epan/proto_data.h>
25 #include <wsutil/filesystem.h>
26 #include <epan/prefs.h>
27 #include <epan/expert.h>
28 #include <epan/garrayfix.h>
29 #include <wsutil/str_util.h>
30 #include <wsutil/report_message.h>
31 
32 #include "packet-xml.h"
33 #include "packet-acdr.h"
34 
35 void proto_register_xml(void);
36 void proto_reg_handoff_xml(void);
37 
38 struct _attr_reg_data {
39     wmem_array_t *hf;
40     const gchar *basename;
41 };
42 
43 
44 static gint ett_dtd = -1;
45 static gint ett_xmpli = -1;
46 
47 static int hf_unknowwn_attrib = -1;
48 static int hf_comment = -1;
49 static int hf_xmlpi = -1;
50 static int hf_dtd_tag = -1;
51 static int hf_doctype = -1;
52 static int hf_cdatasection = -1;
53 
54 static expert_field ei_xml_closing_unopened_tag = EI_INIT;
55 static expert_field ei_xml_closing_unopened_xmpli_tag = EI_INIT;
56 static expert_field ei_xml_unrecognized_text = EI_INIT;
57 
58 /* dissector handles */
59 static dissector_handle_t xml_handle;
60 
61 /* parser definitions */
62 static tvbparse_wanted_t *want;
63 static tvbparse_wanted_t *want_ignore;
64 static tvbparse_wanted_t *want_heur;
65 
66 static wmem_map_t *xmpli_names;
67 static wmem_map_t *media_types;
68 
69 static xml_ns_t xml_ns     = {"xml",     "/", -1, -1, -1, NULL, NULL, NULL};
70 static xml_ns_t unknown_ns = {"unknown", "?", -1, -1, -1, NULL, NULL, NULL};
71 static xml_ns_t *root_ns;
72 
73 static gboolean pref_heuristic_unicode    = FALSE;
74 
75 
76 #define XML_CDATA       -1000
77 #define XML_SCOPED_NAME -1001
78 
79 
80 static wmem_array_t *hf_arr;
81 static GArray *ett_arr;
82 
83 static const gchar *default_media_types[] = {
84     "text/xml",
85     "text/vnd.wap.wml",
86     "text/vnd.wap.si",
87     "text/vnd.wap.sl",
88     "text/vnd.wap.co",
89     "text/vnd.wap.emn",
90     "application/3gpp-ims+xml",
91     "application/atom+xml",
92     "application/auth-policy+xml",
93     "application/ccmp+xml",
94     "application/conference-info+xml",          /*RFC4575*/
95     "application/cpim-pidf+xml",
96     "application/cpl+xml",
97     "application/dds-web+xml",
98     "application/im-iscomposing+xml",           /*RFC3994*/
99     "application/load-control+xml",             /*RFC7200*/
100     "application/mathml+xml",
101     "application/media_control+xml",
102     "application/note+xml",
103     "application/pidf+xml",
104     "application/pidf-diff+xml",
105     "application/poc-settings+xml",
106     "application/rdf+xml",
107     "application/reginfo+xml",
108     "application/resource-lists+xml",
109     "application/rlmi+xml",
110     "application/rls-services+xml",
111     "application/rss+xml",
112     "application/rs-metadata+xml",
113     "application/smil",
114     "application/simple-filter+xml",
115     "application/simple-message-summary+xml",   /*RFC3842*/
116     "application/simservs+xml",
117     "application/soap+xml",
118     "application/vnd.etsi.aoc+xml",
119     "application/vnd.etsi.cug+xml",
120     "application/vnd.etsi.iptvcommand+xml",
121     "application/vnd.etsi.iptvdiscovery+xml",
122     "application/vnd.etsi.iptvprofile+xml",
123     "application/vnd.etsi.iptvsad-bc+xml",
124     "application/vnd.etsi.iptvsad-cod+xml",
125     "application/vnd.etsi.iptvsad-npvr+xml",
126     "application/vnd.etsi.iptvservice+xml",
127     "application/vnd.etsi.iptvsync+xml",
128     "application/vnd.etsi.iptvueprofile+xml",
129     "application/vnd.etsi.mcid+xml",
130     "application/vnd.etsi.overload-control-policy-dataset+xml",
131     "application/vnd.etsi.pstn+xml",
132     "application/vnd.etsi.sci+xml",
133     "application/vnd.etsi.simservs+xml",
134     "application/vnd.etsi.tsl+xml",
135     "application/vnd.oma.xdm-apd+xml",
136     "application/vnd.oma.fnl+xml",
137     "application/vnd.oma.access-permissions-list+xml",
138     "application/vnd.oma.alias-principals-list+xml",
139     "application/upp-directory+xml",            /*OMA-ERELD-XDM-V2_2_1-20170124-A*/
140     "application/vnd.oma.xdm-hi+xml",
141     "application/vnd.oma.xdm-rhi+xml",
142     "application/vnd.oma.xdm-prefs+xml",
143     "application/vnd.oma.xdcp+xml",
144     "application/vnd.oma.bcast.associated-procedure-parameter+xml",
145     "application/vnd.oma.bcast.drm-trigger+xml",
146     "application/vnd.oma.bcast.imd+xml",
147     "application/vnd.oma.bcast.notification+xml",
148     "application/vnd.oma.bcast.sgdd+xml",
149     "application/vnd.oma.bcast.smartcard-trigger+xml",
150     "application/vnd.oma.bcast.sprov+xml",
151     "application/vnd.oma.cab-address-book+xml",
152     "application/vnd.oma.cab-feature-handler+xml",
153     "application/vnd.oma.cab-pcc+xml",
154     "application/vnd.oma.cab-subs-invite+xml",
155     "application/vnd.oma.cab-user-prefs+xml",
156     "application/vnd.oma.dd2+xml",
157     "application/vnd.oma.drm.risd+xml",
158     "application/vnd.oma.group-usage-list+xml",
159     "application/vnd.oma.pal+xml",
160     "application/vnd.oma.poc.detailed-progress-report+xml",
161     "application/vnd.oma.poc.final-report+xml",
162     "application/vnd.oma.poc.groups+xml",
163     "application/vnd.oma.poc.invocation-descriptor+xml",
164     "application/vnd.oma.poc.optimized-progress-report+xml",
165     "application/vnd.oma.scidm.messages+xml",
166     "application/vnd.oma.suppnot+xml",          /*OMA-ERELD-Presence_SIMPLE-V2_0-20120710-A*/
167     "application/vnd.oma.xcap-directory+xml",
168     "application/vnd.omads-email+xml",
169     "application/vnd.omads-file+xml",
170     "application/vnd.omads-folder+xml",
171     "application/vnd.3gpp.access-transfer-events+xml",
172     "application/vnd.3gpp.bsf+xml",
173     "application/vnd.3gpp.comm-div-info+xml",   /*3GPP TS 24.504  version 8.19.0*/
174     "application/vnd.3gpp.cw+xml",
175     "application/vnd.3gpp.iut+xml",             /*3GPP TS 24.337*/
176     "application/vnc.3gpp.iut-config+xml",      /*3GPP TS 24.337*/
177     "application/vnd.3gpp.mcptt-info+xml",
178     "application/vnd.3gpp.mid-call+xml",
179     "application/vnd.3gpp-prose-pc3ch+xml",
180     "application/vnd.3gpp-prose+xml",
181     "application/vnd.3gpp.replication+xml",     /*3GPP TS 24.337*/
182     "application/vnd.3gpp.sms+xml",
183     "application/vnd.3gpp.srvcc-info+xml",
184     "application/vnd.3gpp.srvcc-ext+xml",
185     "application/vnd.3gpp.state-and-event-info+xml",
186     "application/vnd.3gpp.ussd+xml",
187     "application/vnd.3gpp2.bcmcsinfo+xml",
188     "application/vnd.wv.csp+xml",
189     "application/vnd.wv.csp.xml",
190     "application/watcherinfo+xml",
191     "application/xcap-att+xml",
192     "application/xcap-caps+xml",
193     "application/xcap-diff+xml",
194     "application/xcap-el+xml",
195     "application/xcap-error+xml",
196     "application/xcap-ns+xml",
197     "application/xml",
198     "application/xml-dtd",
199     "application/xpidf+xml",
200     "application/xslt+xml",
201     "application/x-crd+xml",
202     "application/x-wms-logconnectstats",
203     "application/x-wms-logplaystats",
204     "application/x-wms-sendevent",
205     "image/svg+xml",
206     "message/imdn+xml",                         /*RFC5438*/
207 };
208 
insert_xml_frame(xml_frame_t * parent,xml_frame_t * new_child)209 static void insert_xml_frame(xml_frame_t *parent, xml_frame_t *new_child)
210 {
211     new_child->first_child  = NULL;
212     new_child->last_child   = NULL;
213 
214     new_child->parent       = parent;
215     new_child->next_sibling = NULL;
216     new_child->prev_sibling = NULL;
217     if (parent == NULL) return;  /* root */
218 
219     if (parent->first_child == NULL) {  /* the 1st child */
220         parent->first_child = new_child;
221     } else {  /* following children */
222         parent->last_child->next_sibling = new_child;
223         new_child->prev_sibling = parent->last_child;
224     }
225     parent->last_child = new_child;
226 }
227 
228 static int
dissect_xml(tvbuff_t * tvb,packet_info * pinfo,proto_tree * tree,void * data _U_)229 dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data _U_)
230 {
231     tvbparse_t       *tt;
232     static GPtrArray *stack;
233     xml_frame_t      *current_frame;
234     const char       *colinfo_str;
235     tvbuff_t         *decoded;
236     guint16           try_bom;
237 
238     if (stack != NULL)
239         g_ptr_array_free(stack, TRUE);
240 
241     stack = g_ptr_array_new();
242     current_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
243     current_frame->type           = XML_FRAME_ROOT;
244     current_frame->name           = NULL;
245     current_frame->name_orig_case = NULL;
246     current_frame->value          = NULL;
247     current_frame->pinfo          = pinfo;
248     insert_xml_frame(NULL, current_frame);
249     g_ptr_array_add(stack, current_frame);
250 
251     /* Detect and act on possible byte-order mark (BOM) */
252     try_bom = tvb_get_ntohs(tvb, 0);
253     if (try_bom == 0xFEFF) {
254         /* UTF-16BE */
255         const guint8 *data_str = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), ENC_UTF_16|ENC_BIG_ENDIAN);
256         size_t l = strlen(data_str);
257         decoded = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
258         add_new_data_source(pinfo, decoded, "Decoded UTF-16BE text");
259     }
260     else if(try_bom == 0xFFFE) {
261         /* UTF-16LE (or possibly UTF-32LE, but Wireshark doesn't support UTF-32) */
262         const guint8 *data_str = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), ENC_UTF_16|ENC_LITTLE_ENDIAN);
263         size_t l = strlen(data_str);
264         decoded = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
265         add_new_data_source(pinfo, decoded, "Decoded UTF-16LE text");
266     }
267     /* Could also test if try_bom is 0xnn00 or 0x00nn to guess endianness if we wanted */
268     else {
269         /* Assume it's UTF-8, either with or without BOM */
270         decoded = tvb;
271     }
272 
273     tt = tvbparse_init(pinfo->pool, decoded, 0, -1, stack, want_ignore);
274     current_frame->start_offset = 0;
275     current_frame->length = tvb_captured_length(decoded);
276 
277     root_ns = NULL;
278 
279     if (pinfo->match_string)
280         root_ns = (xml_ns_t *)wmem_map_lookup(media_types, pinfo->match_string);
281 
282     if (! root_ns ) {
283         root_ns = &xml_ns;
284         colinfo_str = "/XML";
285     } else {
286         char *colinfo_str_buf;
287         colinfo_str_buf = wmem_strconcat(wmem_packet_scope(), "/", root_ns->name, NULL);
288         ascii_strup_inplace(colinfo_str_buf);
289         colinfo_str = colinfo_str_buf;
290     }
291 
292     col_append_str(pinfo->cinfo, COL_PROTOCOL, colinfo_str);
293 
294     current_frame->ns = root_ns;
295 
296     current_frame->item = proto_tree_add_item(tree, current_frame->ns->hf_tag, decoded, 0, -1, ENC_UTF_8|ENC_NA);
297     current_frame->tree = proto_item_add_subtree(current_frame->item, current_frame->ns->ett);
298     current_frame->last_item = current_frame->item;
299 
300     while(tvbparse_get(tt, want)) ;
301 
302     /* Save XML structure in case it is useful for the caller (only XMPP for now) */
303     p_add_proto_data(pinfo->pool, pinfo, xml_ns.hf_tag, 0, current_frame);
304 
305     return tvb_captured_length(tvb);
306 }
307 
dissect_xml_heur(tvbuff_t * tvb,packet_info * pinfo,proto_tree * tree,void * data)308 static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
309 {
310     if (tvbparse_peek(tvbparse_init(pinfo->pool, tvb, 0, -1, NULL, want_ignore), want_heur)) {
311         dissect_xml(tvb, pinfo, tree, data);
312         return TRUE;
313     } else if (pref_heuristic_unicode) {
314         const guint8 *data_str;
315         tvbuff_t     *unicode_tvb;
316         guint16       try_bom;
317         /* XXX - UCS-2, or UTF-16? */
318         gint          enc = ENC_UCS_2|ENC_LITTLE_ENDIAN;
319         size_t        l;
320 
321         try_bom = tvb_get_ntohs(tvb, 0);
322         if (try_bom == 0xFEFF) {
323             enc = ENC_UTF_16|ENC_BIG_ENDIAN;
324         }
325         else if(try_bom == 0xFFFE) {
326             enc = ENC_UTF_16|ENC_LITTLE_ENDIAN;
327         }
328 
329         data_str    = tvb_get_string_enc(pinfo->pool, tvb, 0, tvb_captured_length(tvb), enc);
330         l           = strlen(data_str);
331         unicode_tvb = tvb_new_child_real_data(tvb, data_str, (guint)l, (gint)l);
332         if (tvbparse_peek(tvbparse_init(pinfo->pool, unicode_tvb, 0, -1, NULL, want_ignore), want_heur)) {
333             add_new_data_source(pinfo, unicode_tvb, "UTF8");
334             dissect_xml(unicode_tvb, pinfo, tree, data);
335             return TRUE;
336         }
337     }
338     return FALSE;
339 }
340 
xml_get_tag(xml_frame_t * frame,const gchar * name)341 xml_frame_t *xml_get_tag(xml_frame_t *frame, const gchar *name)
342 {
343     xml_frame_t *tag = NULL;
344 
345     xml_frame_t *xml_item = frame->first_child;
346     while (xml_item) {
347         if (xml_item->type == XML_FRAME_TAG) {
348             if (!name) {  /* get the 1st tag */
349                 tag = xml_item;
350                 break;
351             } else if (xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
352                 tag = xml_item;
353                 break;
354             }
355         }
356         xml_item = xml_item->next_sibling;
357     }
358 
359     return tag;
360 }
361 
xml_get_attrib(xml_frame_t * frame,const gchar * name)362 xml_frame_t *xml_get_attrib(xml_frame_t *frame, const gchar *name)
363 {
364     xml_frame_t *attr = NULL;
365 
366     xml_frame_t *xml_item = frame->first_child;
367     while (xml_item) {
368         if ((xml_item->type == XML_FRAME_ATTRIB) &&
369             xml_item->name_orig_case && !strcmp(xml_item->name_orig_case, name)) {
370             attr = xml_item;
371             break;
372         }
373         xml_item = xml_item->next_sibling;
374     }
375 
376     return attr;
377 }
378 
xml_get_cdata(xml_frame_t * frame)379 xml_frame_t *xml_get_cdata(xml_frame_t *frame)
380 {
381     xml_frame_t *cdata = NULL;
382 
383     xml_frame_t *xml_item = frame->first_child;
384     while (xml_item) {
385         if (xml_item->type == XML_FRAME_CDATA) {
386             cdata = xml_item;
387             break;
388         }
389         xml_item = xml_item->next_sibling;
390     }
391 
392     return cdata;
393 }
394 
after_token(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)395 static void after_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
396 {
397     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
398     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
399     int          hfid;
400     gboolean     is_cdata      = FALSE;
401     proto_item  *pi;
402     xml_frame_t *new_frame;
403 
404     if (tok->id == XML_CDATA) {
405         hfid = current_frame->ns ? current_frame->ns->hf_cdata : xml_ns.hf_cdata;
406         is_cdata = TRUE;
407     } else if ( tok->id > 0) {
408         hfid = tok->id;
409     } else {
410         hfid = xml_ns.hf_cdata;
411     }
412 
413     pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
414 
415     proto_item_set_text(pi, "%s",
416                         tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
417 
418     if (is_cdata) {
419         new_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
420         new_frame->type           = XML_FRAME_CDATA;
421         new_frame->name           = NULL;
422         new_frame->name_orig_case = NULL;
423         new_frame->value          = tvb_new_subset_length(tok->tvb, tok->offset, tok->len);
424         insert_xml_frame(current_frame, new_frame);
425         new_frame->item           = pi;
426         new_frame->last_item      = pi;
427         new_frame->tree           = NULL;
428         new_frame->start_offset   = tok->offset;
429         new_frame->length         = tok->len;
430         new_frame->ns             = NULL;
431         new_frame->pinfo          = current_frame->pinfo;
432     }
433 }
434 
before_xmpli(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)435 static void before_xmpli(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
436 {
437     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
438     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
439     proto_item      *pi;
440     proto_tree      *pt;
441     tvbparse_elem_t *name_tok      = tok->sub->next;
442     gchar           *name          = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
443     xml_ns_t        *ns            = (xml_ns_t *)wmem_map_lookup(xmpli_names, name);
444     xml_frame_t     *new_frame;
445 
446     int  hf_tag;
447     gint ett;
448 
449     ascii_strdown_inplace(name);
450     if (!ns) {
451         hf_tag = hf_xmlpi;
452         ett = ett_xmpli;
453     } else {
454         hf_tag = ns->hf_tag;
455         ett = ns->ett;
456     }
457 
458     pi = proto_tree_add_item(current_frame->tree, hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
459 
460     proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, (name_tok->offset - tok->offset) + name_tok->len));
461 
462     pt = proto_item_add_subtree(pi, ett);
463 
464     new_frame                 = wmem_new(wmem_packet_scope(), xml_frame_t);
465     new_frame->type           = XML_FRAME_XMPLI;
466     new_frame->name           = name;
467     new_frame->name_orig_case = name;
468     new_frame->value          = NULL;
469     insert_xml_frame(current_frame, new_frame);
470     new_frame->item           = pi;
471     new_frame->last_item      = pi;
472     new_frame->tree           = pt;
473     new_frame->start_offset   = tok->offset;
474     new_frame->length         = tok->len;
475     new_frame->ns             = ns;
476     new_frame->pinfo          = current_frame->pinfo;
477 
478     g_ptr_array_add(stack, new_frame);
479 
480 }
481 
after_xmlpi(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)482 static void after_xmlpi(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
483 {
484     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
485     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
486 
487     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
488 
489     if (stack->len > 1) {
490         g_ptr_array_remove_index_fast(stack, stack->len - 1);
491     } else {
492         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_xmpli_tag,
493             tok->tvb, tok->offset, tok->len);
494     }
495 }
496 
before_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)497 static void before_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
498 {
499     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
500     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
501     tvbparse_elem_t *name_tok      = tok->sub->next;
502     gchar           *root_name;
503     gchar           *name          = NULL, *name_orig_case = NULL;
504     xml_ns_t        *ns;
505     xml_frame_t     *new_frame;
506     proto_item      *pi;
507     proto_tree      *pt;
508 
509     if (name_tok->sub->id == XML_SCOPED_NAME) {
510         tvbparse_elem_t *root_tok = name_tok->sub->sub;
511         tvbparse_elem_t *leaf_tok = name_tok->sub->sub->next->next;
512         xml_ns_t        *nameroot_ns;
513 
514         root_name      = (gchar *)tvb_get_string_enc(wmem_packet_scope(), root_tok->tvb, root_tok->offset, root_tok->len, ENC_ASCII);
515         name           = (gchar *)tvb_get_string_enc(wmem_packet_scope(), leaf_tok->tvb, leaf_tok->offset, leaf_tok->len, ENC_ASCII);
516         name_orig_case = name;
517 
518         nameroot_ns = (xml_ns_t *)wmem_map_lookup(xml_ns.elements, root_name);
519 
520         if(nameroot_ns) {
521             ns = (xml_ns_t *)wmem_map_lookup(nameroot_ns->elements, name);
522             if (!ns) {
523                 ns = &unknown_ns;
524             }
525         } else {
526             ns = &unknown_ns;
527         }
528 
529     } else {
530         name = tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb, name_tok->offset, name_tok->len, ENC_ASCII);
531         name_orig_case = wmem_strdup(wmem_packet_scope(), name);
532         ascii_strdown_inplace(name);
533 
534         if(current_frame->ns) {
535             ns = (xml_ns_t *)wmem_map_lookup(current_frame->ns->elements, name);
536 
537             if (!ns) {
538                 if (! ( ns = (xml_ns_t *)wmem_map_lookup(root_ns->elements, name) ) ) {
539                     ns = &unknown_ns;
540                 }
541             }
542         } else {
543             ns = &unknown_ns;
544         }
545     }
546 
547     pi = proto_tree_add_item(current_frame->tree, ns->hf_tag, tok->tvb, tok->offset, tok->len, ENC_UTF_8|ENC_NA);
548     proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb,
549                                                   tok->offset,
550                                                   (name_tok->offset - tok->offset) + name_tok->len));
551 
552     pt = proto_item_add_subtree(pi, ns->ett);
553 
554     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
555     new_frame->type           = XML_FRAME_TAG;
556     new_frame->name           = name;
557     new_frame->name_orig_case = name_orig_case;
558     new_frame->value          = NULL;
559     insert_xml_frame(current_frame, new_frame);
560     new_frame->item           = pi;
561     new_frame->last_item      = pi;
562     new_frame->tree           = pt;
563     new_frame->start_offset   = tok->offset;
564     new_frame->length         = tok->len;
565     new_frame->ns             = ns;
566     new_frame->pinfo          = current_frame->pinfo;
567 
568     g_ptr_array_add(stack, new_frame);
569 
570 }
571 
after_open_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)572 static void after_open_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
573 {
574     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
575     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
576 
577     proto_item_append_text(current_frame->last_item, ">");
578 }
579 
after_closed_tag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)580 static void after_closed_tag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
581 {
582     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
583     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
584 
585     proto_item_append_text(current_frame->last_item, "/>");
586 
587     if (stack->len > 1) {
588         g_ptr_array_remove_index_fast(stack, stack->len - 1);
589     } else {
590         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
591                               tok->tvb, tok->offset, tok->len);
592     }
593 }
594 
after_untag(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)595 static void after_untag(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
596 {
597     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
598     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
599 
600     proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
601     current_frame->length = (tok->offset - current_frame->start_offset) + tok->len;
602 
603     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
604 
605     if (stack->len > 1) {
606         g_ptr_array_remove_index_fast(stack, stack->len - 1);
607     } else {
608         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
609             tok->tvb, tok->offset, tok->len);
610     }
611 }
612 
before_dtd_doctype(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)613 static void before_dtd_doctype(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
614 {
615     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
616     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
617     xml_frame_t     *new_frame;
618     tvbparse_elem_t *name_tok      = tok->sub->next->next->next->sub->sub;
619     proto_tree      *dtd_item      = proto_tree_add_item(current_frame->tree, hf_doctype,
620                                                          name_tok->tvb, name_tok->offset,
621                                                          name_tok->len, ENC_ASCII|ENC_NA);
622 
623     proto_item_set_text(dtd_item, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
624 
625     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
626     new_frame->type           = XML_FRAME_DTD_DOCTYPE;
627     new_frame->name           = (gchar *)tvb_get_string_enc(wmem_packet_scope(), name_tok->tvb,
628                                                                   name_tok->offset,
629                                                                   name_tok->len, ENC_ASCII);
630     new_frame->name_orig_case = new_frame->name;
631     new_frame->value          = NULL;
632     insert_xml_frame(current_frame, new_frame);
633     new_frame->item           = dtd_item;
634     new_frame->last_item      = dtd_item;
635     new_frame->tree           = proto_item_add_subtree(dtd_item, ett_dtd);
636     new_frame->start_offset   = tok->offset;
637     new_frame->length         = tok->len;
638     new_frame->ns             = NULL;
639     new_frame->pinfo          = current_frame->pinfo;
640 
641     g_ptr_array_add(stack, new_frame);
642 }
643 
pop_stack(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)644 static void pop_stack(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
645 {
646     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
647     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
648 
649     if (stack->len > 1) {
650         g_ptr_array_remove_index_fast(stack, stack->len - 1);
651     } else {
652         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
653             tok->tvb, tok->offset, tok->len);
654     }
655 }
656 
after_dtd_close(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)657 static void after_dtd_close(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
658 {
659     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
660     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
661 
662     proto_tree_add_format_text(current_frame->tree, tok->tvb, tok->offset, tok->len);
663     if (stack->len > 1) {
664         g_ptr_array_remove_index_fast(stack, stack->len - 1);
665     } else {
666         proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_closing_unopened_tag,
667             tok->tvb, tok->offset, tok->len);
668     }
669 }
670 
get_attrib_value(void * tvbparse_data _U_,const void * wanted_data _U_,tvbparse_elem_t * tok)671 static void get_attrib_value(void *tvbparse_data _U_, const void *wanted_data _U_, tvbparse_elem_t *tok)
672 {
673     tok->data = tok->sub;
674 }
675 
after_attrib(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok)676 static void after_attrib(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok)
677 {
678     GPtrArray       *stack         = (GPtrArray *)tvbparse_data;
679     xml_frame_t     *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
680     gchar           *name, *name_orig_case;
681     tvbparse_elem_t *value;
682     tvbparse_elem_t *value_part    = (tvbparse_elem_t *)tok->sub->next->next->data;
683     int             *hfidp;
684     int              hfid;
685     proto_item      *pi;
686     xml_frame_t     *new_frame;
687 
688     name           = tvb_get_string_enc(wmem_packet_scope(), tok->sub->tvb, tok->sub->offset, tok->sub->len, ENC_ASCII);
689     name_orig_case = wmem_strdup(wmem_packet_scope(), name);
690     ascii_strdown_inplace(name);
691 
692     if(current_frame->ns && (hfidp = (int *)wmem_map_lookup(current_frame->ns->attributes, name) )) {
693         hfid  = *hfidp;
694         value = value_part;
695     } else {
696         hfid  = hf_unknowwn_attrib;
697         value = tok;
698     }
699 
700     pi = proto_tree_add_item(current_frame->tree, hfid, value->tvb, value->offset, value->len, ENC_UTF_8|ENC_NA);
701     proto_item_set_text(pi, "%s", tvb_format_text(wmem_packet_scope(), tok->tvb, tok->offset, tok->len));
702 
703     current_frame->last_item = pi;
704 
705     new_frame = wmem_new(wmem_packet_scope(), xml_frame_t);
706     new_frame->type           = XML_FRAME_ATTRIB;
707     new_frame->name           = name;
708     new_frame->name_orig_case = name_orig_case;
709     new_frame->value          = tvb_new_subset_length(value_part->tvb, value_part->offset,
710                            value_part->len);
711     insert_xml_frame(current_frame, new_frame);
712     new_frame->item           = pi;
713     new_frame->last_item      = pi;
714     new_frame->tree           = NULL;
715     new_frame->start_offset   = tok->offset;
716     new_frame->length         = tok->len;
717     new_frame->ns             = NULL;
718     new_frame->pinfo          = current_frame->pinfo;
719 
720 }
721 
unrecognized_token(void * tvbparse_data,const void * wanted_data _U_,tvbparse_elem_t * tok _U_)722 static void unrecognized_token(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok _U_)
723 {
724     GPtrArray   *stack         = (GPtrArray *)tvbparse_data;
725     xml_frame_t *current_frame = (xml_frame_t *)g_ptr_array_index(stack, stack->len - 1);
726 
727     proto_tree_add_expert(current_frame->tree, current_frame->pinfo, &ei_xml_unrecognized_text,
728                     tok->tvb, tok->offset, tok->len);
729 
730 }
731 
732 
733 
init_xml_parser(void)734 static void init_xml_parser(void)
735 {
736     tvbparse_wanted_t *want_name =
737         tvbparse_chars(-1, 1, 0,
738                    "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
739                    NULL, NULL, NULL);
740     tvbparse_wanted_t *want_attr_name =
741         tvbparse_chars(-1, 1, 0,
742                    "abcdefghijklmnopqrstuvwxyz.-_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789:",
743                    NULL, NULL, NULL);
744 
745     tvbparse_wanted_t *want_scoped_name = tvbparse_set_seq(XML_SCOPED_NAME, NULL, NULL, NULL,
746                                    want_name,
747                                    tvbparse_char(-1, ":", NULL, NULL, NULL),
748                                    want_name,
749                                    NULL);
750 
751     tvbparse_wanted_t *want_tag_name = tvbparse_set_oneof(0, NULL, NULL, NULL,
752                                   want_scoped_name,
753                                   want_name,
754                                   NULL);
755 
756     tvbparse_wanted_t *want_attrib_value = tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
757                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\"', '\\'),
758                                   tvbparse_quoted(-1, NULL, NULL, tvbparse_shrink_token_cb, '\'', '\\'),
759                                   tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL),
760                                   want_name,
761                                   NULL);
762 
763     tvbparse_wanted_t *want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
764                                   tvbparse_set_seq(-1, NULL, NULL, after_attrib,
765                                            want_attr_name,
766                                            tvbparse_char(-1, "=", NULL, NULL, NULL),
767                                            want_attrib_value,
768                                            NULL));
769 
770     tvbparse_wanted_t *want_stoptag = tvbparse_set_oneof(-1, NULL, NULL, NULL,
771                                  tvbparse_char(-1, ">", NULL, NULL, after_open_tag),
772                                  tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
773                                  NULL);
774 
775     tvbparse_wanted_t *want_stopxmlpi = tvbparse_string(-1, "?>", NULL, NULL, after_xmlpi);
776 
777     tvbparse_wanted_t *want_comment = tvbparse_set_seq(hf_comment, NULL, NULL, after_token,
778                                tvbparse_string(-1, "<!--", NULL, NULL, NULL),
779                                tvbparse_until(-1, NULL, NULL, NULL,
780                                       tvbparse_string(-1, "-->", NULL, NULL, NULL),
781                                       TP_UNTIL_INCLUDE),
782                                NULL);
783 
784     tvbparse_wanted_t *want_cdatasection = tvbparse_set_seq(hf_cdatasection, NULL, NULL, after_token,
785                                tvbparse_string(-1, "<![CDATA[", NULL, NULL, NULL),
786                                tvbparse_until(-1, NULL, NULL, NULL,
787                                        tvbparse_string(-1, "]]>", NULL, NULL, NULL),
788                                        TP_UNTIL_INCLUDE),
789                                 NULL);
790 
791     tvbparse_wanted_t *want_xmlpi = tvbparse_set_seq(hf_xmlpi, NULL, before_xmpli, NULL,
792                              tvbparse_string(-1, "<?", NULL, NULL, NULL),
793                              want_name,
794                              tvbparse_set_oneof(-1, NULL, NULL, NULL,
795                                         want_stopxmlpi,
796                                         tvbparse_set_seq(-1, NULL, NULL, NULL,
797                                                  want_attributes,
798                                                  want_stopxmlpi,
799                                                  NULL),
800                                         NULL),
801                              NULL);
802 
803     tvbparse_wanted_t *want_closing_tag = tvbparse_set_seq(0, NULL, NULL, after_untag,
804                                    tvbparse_char(-1, "<", NULL, NULL, NULL),
805                                    tvbparse_char(-1, "/", NULL, NULL, NULL),
806                                    want_tag_name,
807                                    tvbparse_char(-1, ">", NULL, NULL, NULL),
808                                    NULL);
809 
810     tvbparse_wanted_t *want_doctype_start = tvbparse_set_seq(-1, NULL, before_dtd_doctype, NULL,
811                                  tvbparse_char(-1, "<", NULL, NULL, NULL),
812                                  tvbparse_char(-1, "!", NULL, NULL, NULL),
813                                  tvbparse_casestring(-1, "DOCTYPE", NULL, NULL, NULL),
814                                  tvbparse_set_oneof(-1, NULL, NULL, NULL,
815                                             tvbparse_set_seq(-1, NULL, NULL, NULL,
816                                                      want_name,
817                                                      tvbparse_char(-1, "[", NULL, NULL, NULL),
818                                                      NULL),
819                                             tvbparse_set_seq(-1, NULL, NULL, pop_stack,
820                                                      want_name,
821                                                      tvbparse_set_oneof(-1, NULL, NULL, NULL,
822                                                             tvbparse_casestring(-1, "PUBLIC", NULL, NULL, NULL),
823                                                             tvbparse_casestring(-1, "SYSTEM", NULL, NULL, NULL),
824                                                             NULL),
825                                                      tvbparse_until(-1, NULL, NULL, NULL,
826                                                             tvbparse_char(-1, ">", NULL, NULL, NULL),
827                                                             TP_UNTIL_INCLUDE),
828                                                      NULL),
829                                             NULL),
830                                  NULL);
831 
832     tvbparse_wanted_t *want_dtd_tag = tvbparse_set_seq(hf_dtd_tag, NULL, NULL, after_token,
833                                tvbparse_char(-1, "<", NULL, NULL, NULL),
834                                tvbparse_char(-1, "!", NULL, NULL, NULL),
835                                tvbparse_until(-1, NULL, NULL, NULL,
836                                       tvbparse_char(-1, ">", NULL, NULL, NULL),
837                                       TP_UNTIL_INCLUDE),
838                                NULL);
839 
840     tvbparse_wanted_t *want_tag = tvbparse_set_seq(-1, NULL, before_tag, NULL,
841                                tvbparse_char(-1, "<", NULL, NULL, NULL),
842                                want_tag_name,
843                                tvbparse_set_oneof(-1, NULL, NULL, NULL,
844                                       tvbparse_set_seq(-1, NULL, NULL, NULL,
845                                                want_attributes,
846                                                want_stoptag,
847                                                NULL),
848                                       want_stoptag,
849                                       NULL),
850                                NULL);
851 
852     tvbparse_wanted_t *want_dtd_close = tvbparse_set_seq(-1, NULL, NULL, after_dtd_close,
853                                  tvbparse_char(-1, "]", NULL, NULL, NULL),
854                                  tvbparse_char(-1, ">", NULL, NULL, NULL),
855                                  NULL);
856 
857     want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL);
858 
859 
860     want = tvbparse_set_oneof(-1, NULL, NULL, NULL,
861                   want_comment,
862                   want_cdatasection,
863                   want_xmlpi,
864                   want_closing_tag,
865                   want_doctype_start,
866                   want_dtd_close,
867                   want_dtd_tag,
868                   want_tag,
869                   tvbparse_not_chars(XML_CDATA, 1, 0, "<", NULL, NULL, after_token),
870                   tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, unrecognized_token),
871                   NULL);
872 
873     want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
874                        want_comment,
875                        want_cdatasection,
876                        want_xmlpi,
877                        want_doctype_start,
878                        want_dtd_tag,
879                        want_tag,
880                        NULL);
881 
882 }
883 
884 
xml_new_namespace(wmem_map_t * hash,const gchar * name,...)885 static xml_ns_t *xml_new_namespace(wmem_map_t *hash, const gchar *name, ...)
886 {
887     xml_ns_t *ns = wmem_new(wmem_epan_scope(), xml_ns_t);
888     va_list   ap;
889     gchar    *attr_name;
890 
891     ns->name       = wmem_strdup(wmem_epan_scope(), name);
892     ns->hf_tag     = -1;
893     ns->hf_cdata   = -1;
894     ns->ett        = -1;
895     ns->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
896     ns->elements   = NULL;
897 
898     va_start(ap, name);
899 
900     while(( attr_name = va_arg(ap, gchar *) )) {
901         int *hfp = wmem_new(wmem_epan_scope(), int);
902         *hfp = -1;
903         wmem_map_insert(ns->attributes, wmem_strdup(wmem_epan_scope(), attr_name), hfp);
904     };
905 
906     va_end(ap);
907 
908     wmem_map_insert(hash, ns->name, ns);
909 
910     return ns;
911 }
912 
913 
add_xml_field(wmem_array_t * hfs,int * p_id,const gchar * name,const gchar * fqn)914 static void add_xml_field(wmem_array_t *hfs, int *p_id, const gchar *name, const gchar *fqn)
915 {
916     hf_register_info hfri;
917 
918     hfri.p_id          = p_id;
919     hfri.hfinfo.name           = name;
920     hfri.hfinfo.abbrev         = fqn;
921     hfri.hfinfo.type           = FT_STRING;
922     hfri.hfinfo.display        = BASE_NONE;
923     hfri.hfinfo.strings        = NULL;
924     hfri.hfinfo.bitmask        = 0x0;
925     hfri.hfinfo.blurb          = NULL;
926     HFILL_INIT(hfri);
927 
928     wmem_array_append_one(hfs, hfri);
929 }
930 
add_xml_attribute_names(gpointer k,gpointer v,gpointer p)931 static void add_xml_attribute_names(gpointer k, gpointer v, gpointer p)
932 {
933     struct _attr_reg_data *d = (struct _attr_reg_data *)p;
934     const gchar *basename = wmem_strconcat(wmem_epan_scope(), d->basename, ".", (gchar *)k, NULL);
935 
936     add_xml_field(d->hf, (int*) v, (gchar *)k, basename);
937 }
938 
939 
add_xmlpi_namespace(gpointer k _U_,gpointer v,gpointer p)940 static void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p)
941 {
942     xml_ns_t *ns       = (xml_ns_t *)v;
943     const gchar *basename = wmem_strconcat(wmem_epan_scope(), (gchar *)p, ".", ns->name, NULL);
944     gint     *ett_p    = &(ns->ett);
945     struct _attr_reg_data d;
946 
947     add_xml_field(hf_arr, &(ns->hf_tag), basename, basename);
948 
949     g_array_append_val(ett_arr, ett_p);
950 
951     d.basename = basename;
952     d.hf = hf_arr;
953 
954     wmem_map_foreach(ns->attributes, add_xml_attribute_names, &d);
955 
956 }
957 
destroy_dtd_data(dtd_build_data_t * dtd_data)958 static void destroy_dtd_data(dtd_build_data_t *dtd_data)
959 {
960     g_free(dtd_data->proto_name);
961     g_free(dtd_data->media_type);
962     g_free(dtd_data->description);
963     g_free(dtd_data->proto_root);
964 
965     g_string_free(dtd_data->error, TRUE);
966 
967     while(dtd_data->elements->len) {
968         dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->elements, 0);
969         g_ptr_array_free(nl->list, TRUE);
970         g_free(nl->name);
971         g_free(nl);
972     }
973 
974     g_ptr_array_free(dtd_data->elements, TRUE);
975 
976     while(dtd_data->attributes->len) {
977         dtd_named_list_t *nl = (dtd_named_list_t *)g_ptr_array_remove_index_fast(dtd_data->attributes, 0);
978         g_ptr_array_free(nl->list, TRUE);
979         g_free(nl->name);
980         g_free(nl);
981     }
982 
983     g_ptr_array_free(dtd_data->attributes, TRUE);
984 
985     g_free(dtd_data);
986 }
987 
copy_attrib_item(gpointer k,gpointer v _U_,gpointer p)988 static void copy_attrib_item(gpointer k, gpointer v _U_, gpointer p)
989 {
990     gchar      *key   = (gchar *)wmem_strdup(wmem_epan_scope(), (const gchar *)k);
991     int        *value = wmem_new(wmem_epan_scope(), int);
992     wmem_map_t *dst   = (wmem_map_t *)p;
993 
994     *value = -1;
995     wmem_map_insert(dst, key, value);
996 
997 }
998 
copy_attributes_hash(wmem_map_t * src)999 static wmem_map_t *copy_attributes_hash(wmem_map_t *src)
1000 {
1001     wmem_map_t *dst = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1002 
1003     wmem_map_foreach(src, copy_attrib_item, dst);
1004 
1005     return dst;
1006 }
1007 
duplicate_element(xml_ns_t * orig)1008 static xml_ns_t *duplicate_element(xml_ns_t *orig)
1009 {
1010     xml_ns_t *new_item = wmem_new(wmem_epan_scope(), xml_ns_t);
1011     guint     i;
1012 
1013     new_item->name          = wmem_strdup(wmem_epan_scope(), orig->name);
1014     new_item->hf_tag        = -1;
1015     new_item->hf_cdata      = -1;
1016     new_item->ett           = -1;
1017     new_item->attributes    = copy_attributes_hash(orig->attributes);
1018     new_item->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1019     new_item->element_names = g_ptr_array_new();
1020 
1021     for(i=0; i < orig->element_names->len; i++) {
1022         g_ptr_array_add(new_item->element_names,
1023                            g_ptr_array_index(orig->element_names, i));
1024     }
1025 
1026     return new_item;
1027 }
1028 
fully_qualified_name(GPtrArray * hier,gchar * name,gchar * proto_name)1029 static gchar *fully_qualified_name(GPtrArray *hier, gchar *name, gchar *proto_name)
1030 {
1031     guint    i;
1032     wmem_strbuf_t *s = wmem_strbuf_new(wmem_epan_scope(), proto_name);
1033 
1034     wmem_strbuf_append(s, ".");
1035 
1036     for (i = 1; i < hier->len; i++) {
1037         wmem_strbuf_append_printf(s, "%s.", (gchar *)g_ptr_array_index(hier, i));
1038     }
1039 
1040     wmem_strbuf_append(s, name);
1041 
1042     return wmem_strbuf_finalize(s);;
1043 }
1044 
1045 
make_xml_hier(gchar * elem_name,xml_ns_t * root,wmem_map_t * elements,GPtrArray * hier,GString * error,wmem_array_t * hfs,GArray * etts,char * proto_name)1046 static xml_ns_t *make_xml_hier(gchar      *elem_name,
1047                                xml_ns_t   *root,
1048                                wmem_map_t *elements,
1049                                GPtrArray  *hier,
1050                                GString    *error,
1051                                wmem_array_t *hfs,
1052                                GArray     *etts,
1053                                char       *proto_name)
1054 {
1055     xml_ns_t *fresh;
1056     xml_ns_t *orig;
1057     gchar    *fqn;
1058     gint     *ett_p;
1059     gboolean  recurred = FALSE;
1060     guint     i;
1061     struct _attr_reg_data  d;
1062 
1063     if ( g_str_equal(elem_name, root->name) ) {
1064         return NULL;
1065     }
1066 
1067     if (! ( orig = (xml_ns_t *)wmem_map_lookup(elements, elem_name) )) {
1068         g_string_append_printf(error, "element '%s' is not defined\n", elem_name);
1069         return NULL;
1070     }
1071 
1072     for (i = 0; i < hier->len; i++) {
1073         if( (elem_name) && (strcmp(elem_name, (gchar *) g_ptr_array_index(hier, i) ) == 0 )) {
1074             recurred = TRUE;
1075         }
1076     }
1077 
1078     if (recurred) {
1079         return NULL;
1080     }
1081 
1082     fqn = fully_qualified_name(hier, elem_name, proto_name);
1083 
1084     fresh = duplicate_element(orig);
1085     fresh->fqn = fqn;
1086 
1087     add_xml_field(hfs, &(fresh->hf_tag), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1088     add_xml_field(hfs, &(fresh->hf_cdata), wmem_strdup(wmem_epan_scope(), elem_name), fqn);
1089 
1090     ett_p = &fresh->ett;
1091     g_array_append_val(etts, ett_p);
1092 
1093     d.basename = fqn;
1094     d.hf = hfs;
1095 
1096     wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1097 
1098     while(fresh->element_names->len) {
1099         gchar *child_name = (gchar *)g_ptr_array_remove_index(fresh->element_names, 0);
1100         xml_ns_t *child_element = NULL;
1101 
1102         g_ptr_array_add(hier, elem_name);
1103         child_element = make_xml_hier(child_name, root, elements, hier, error, hfs, etts, proto_name);
1104         g_ptr_array_remove_index_fast(hier, hier->len - 1);
1105 
1106         if (child_element) {
1107             wmem_map_insert(fresh->elements, child_element->name, child_element);
1108         }
1109     }
1110 
1111     g_ptr_array_free(fresh->element_names, TRUE);
1112     fresh->element_names = NULL;
1113     return fresh;
1114 }
1115 
free_elements(gpointer k _U_,gpointer v,gpointer p _U_)1116 static void free_elements(gpointer k _U_, gpointer v, gpointer p _U_)
1117 {
1118     xml_ns_t *e = (xml_ns_t *)v;
1119 
1120     while (e->element_names->len) {
1121         g_free(g_ptr_array_remove_index(e->element_names, 0));
1122     }
1123 
1124     g_ptr_array_free(e->element_names, TRUE);
1125 }
1126 
register_dtd(dtd_build_data_t * dtd_data,GString * errors)1127 static void register_dtd(dtd_build_data_t *dtd_data, GString *errors)
1128 {
1129     wmem_map_t *elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1130     gchar      *root_name     = NULL;
1131     xml_ns_t   *root_element  = NULL;
1132     wmem_array_t *hfs;
1133     GArray     *etts;
1134     GPtrArray  *hier;
1135     gchar      *curr_name;
1136     GPtrArray  *element_names = g_ptr_array_new();
1137 
1138     /* we first populate elements with the those coming from the parser */
1139     while(dtd_data->elements->len) {
1140         dtd_named_list_t *nl      = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->elements, 0);
1141         xml_ns_t         *element = wmem_new(wmem_epan_scope(), xml_ns_t);
1142 
1143         /* we will use the first element found as root in case no other one was given. */
1144         if (root_name == NULL)
1145             root_name = wmem_strdup(wmem_epan_scope(), nl->name);
1146 
1147         element->name          = wmem_strdup(wmem_epan_scope(), nl->name);
1148         element->element_names = nl->list;
1149         element->hf_tag        = -1;
1150         element->hf_cdata      = -1;
1151         element->ett           = -1;
1152         element->attributes    = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1153         element->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1154 
1155         if( wmem_map_lookup(elements, element->name) ) {
1156             g_string_append_printf(errors, "element %s defined more than once\n", element->name);
1157             free_elements(NULL, element, NULL);
1158         } else {
1159             wmem_map_insert(elements, element->name, element);
1160             g_ptr_array_add(element_names, wmem_strdup(wmem_epan_scope(), element->name));
1161         }
1162 
1163         g_free(nl->name);
1164         g_free(nl);
1165     }
1166 
1167     /* then we add the attributes to its relative elements */
1168     while(dtd_data->attributes->len) {
1169         dtd_named_list_t *nl      = (dtd_named_list_t *)g_ptr_array_remove_index(dtd_data->attributes, 0);
1170         xml_ns_t         *element = (xml_ns_t *)wmem_map_lookup(elements, nl->name);
1171 
1172         if (element) {
1173             while(nl->list->len) {
1174                 gchar *name = (gchar *)g_ptr_array_remove_index(nl->list, 0);
1175                 int   *id_p = wmem_new(wmem_epan_scope(), int);
1176 
1177                 *id_p = -1;
1178                 wmem_map_insert(element->attributes, wmem_strdup(wmem_epan_scope(), name), id_p);
1179                 g_free(name);            }
1180         }
1181         else {
1182             g_string_append_printf(errors, "element %s is not defined\n", nl->name);
1183         }
1184 
1185         g_free(nl->name);
1186         g_ptr_array_free(nl->list, TRUE);
1187         g_free(nl);
1188     }
1189 
1190     /* if a proto_root is defined in the dtd we'll use that as root */
1191     if( dtd_data->proto_root ) {
1192         wmem_free(wmem_epan_scope(), root_name);
1193         root_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_root);
1194     }
1195 
1196     /* we use a stack with the names to avoid recurring infinitelly */
1197     hier = g_ptr_array_new();
1198 
1199     /*
1200      * if a proto name was given in the dtd the dtd will be used as a protocol
1201      * or else the dtd will be loaded as a branch of the xml namespace
1202      */
1203     if( ! dtd_data->proto_name ) {
1204         hfs  = hf_arr;
1205         etts = ett_arr;
1206         g_ptr_array_add(hier, wmem_strdup(wmem_epan_scope(), "xml"));
1207     } else {
1208         /*
1209          * if we were given a proto_name the namespace will be registered
1210          * as an independent protocol with its own hf and ett arrays.
1211          */
1212         hfs  = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1213         etts = g_array_new(FALSE, FALSE, sizeof(gint *));
1214     }
1215 
1216     /* the root element of the dtd's namespace */
1217     root_element = wmem_new(wmem_epan_scope(), xml_ns_t);
1218     root_element->name          = wmem_strdup(wmem_epan_scope(), root_name);
1219     root_element->fqn           = dtd_data->proto_name ? wmem_strdup(wmem_epan_scope(), dtd_data->proto_name) : root_element->name;
1220     root_element->hf_tag        = -1;
1221     root_element->hf_cdata      = -1;
1222     root_element->ett           = -1;
1223     root_element->elements      = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1224     root_element->element_names = element_names;
1225 
1226     /*
1227      * we can either create a namespace as a flat namespace
1228      * in which all the elements are at the root level
1229      * or we can create a recursive namespace
1230      */
1231     if (dtd_data->recursion) {
1232         xml_ns_t *orig_root;
1233 
1234         make_xml_hier(root_name, root_element, elements, hier, errors, hfs, etts, dtd_data->proto_name);
1235 
1236         wmem_map_insert(root_element->elements, (gpointer)root_element->name, root_element);
1237 
1238         orig_root = (xml_ns_t *)wmem_map_lookup(elements, root_name);
1239 
1240         /* if the root element was defined copy its attrlist to the child */
1241         if(orig_root) {
1242             struct _attr_reg_data d;
1243 
1244             d.basename = dtd_data->proto_name;
1245             d.hf = hfs;
1246 
1247             root_element->attributes = copy_attributes_hash(orig_root->attributes);
1248             wmem_map_foreach(root_element->attributes, add_xml_attribute_names, &d);
1249         } else {
1250             root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1251         }
1252 
1253         /* we then create all the sub hierarchies to catch the recurred cases */
1254         g_ptr_array_add(hier, root_name);
1255 
1256         while(root_element->element_names->len) {
1257             curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1258 
1259             if( ! wmem_map_lookup(root_element->elements, curr_name) ) {
1260                 xml_ns_t *fresh = make_xml_hier(curr_name, root_element, elements, hier, errors,
1261                                               hfs, etts, dtd_data->proto_name);
1262                 wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1263             }
1264         }
1265 
1266     } else {
1267         /* a flat namespace */
1268         g_ptr_array_add(hier, root_name);
1269 
1270         root_element->attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1271 
1272         while(root_element->element_names->len) {
1273             xml_ns_t *fresh;
1274             gint *ett_p;
1275             struct _attr_reg_data d;
1276 
1277             curr_name = (gchar *)g_ptr_array_remove_index(root_element->element_names, 0);
1278             fresh       = duplicate_element((xml_ns_t *)wmem_map_lookup(elements, curr_name));
1279             fresh->fqn  = fully_qualified_name(hier, curr_name, root_name);
1280 
1281             add_xml_field(hfs, &(fresh->hf_tag), curr_name, fresh->fqn);
1282             add_xml_field(hfs, &(fresh->hf_cdata), curr_name, fresh->fqn);
1283 
1284             d.basename = fresh->fqn;
1285             d.hf = hfs;
1286 
1287             wmem_map_foreach(fresh->attributes, add_xml_attribute_names, &d);
1288 
1289             ett_p = &fresh->ett;
1290             g_array_append_val(etts, ett_p);
1291 
1292             g_ptr_array_free(fresh->element_names, TRUE);
1293 
1294             wmem_map_insert(root_element->elements, (gpointer)fresh->name, fresh);
1295         }
1296     }
1297 
1298     g_ptr_array_free(element_names, TRUE);
1299 
1300     g_ptr_array_free(hier, TRUE);
1301 
1302     /*
1303      * if we were given a proto_name the namespace will be registered
1304      * as an independent protocol.
1305      */
1306     if( dtd_data->proto_name ) {
1307         gint *ett_p;
1308         gchar *full_name, *short_name;
1309 
1310         if (dtd_data->description) {
1311             full_name = wmem_strdup(wmem_epan_scope(), dtd_data->description);
1312         } else {
1313             full_name = wmem_strdup(wmem_epan_scope(), root_name);
1314         }
1315         short_name = wmem_strdup(wmem_epan_scope(), dtd_data->proto_name);
1316 
1317         ett_p = &root_element->ett;
1318         g_array_append_val(etts, ett_p);
1319 
1320         add_xml_field(hfs, &root_element->hf_cdata, root_element->name, root_element->fqn);
1321 
1322         root_element->hf_tag = proto_register_protocol(full_name, short_name, short_name);
1323         proto_register_field_array(root_element->hf_tag, (hf_register_info*)wmem_array_get_raw(hfs), wmem_array_get_count(hfs));
1324         proto_register_subtree_array((gint **)g_array_data(etts), etts->len);
1325 
1326         if (dtd_data->media_type) {
1327             gchar* media_type = wmem_strdup(wmem_epan_scope(), dtd_data->media_type);
1328             wmem_map_insert(media_types, media_type, root_element);
1329         }
1330 
1331         g_array_free(etts, TRUE);
1332     }
1333 
1334     wmem_map_insert(xml_ns.elements, root_element->name, root_element);
1335     wmem_map_foreach(elements, free_elements, NULL);
1336 
1337     destroy_dtd_data(dtd_data);
1338     wmem_free(wmem_epan_scope(), root_name);
1339 }
1340 
1341 #  define DIRECTORY_T GDir
1342 #  define FILE_T gchar
1343 #  define OPENDIR_OP(name) g_dir_open(name, 0, dummy)
1344 #  define DIRGETNEXT_OP(dir) g_dir_read_name(dir)
1345 #  define GETFNAME_OP(file) (file);
1346 #  define CLOSEDIR_OP(dir) g_dir_close(dir)
1347 
init_xml_names(void)1348 static void init_xml_names(void)
1349 {
1350     guint         i;
1351     DIRECTORY_T  *dir;
1352     const FILE_T *file;
1353     const gchar  *filename;
1354     gchar        *dirname;
1355 
1356     GError **dummy = wmem_new(wmem_epan_scope(), GError *);
1357     *dummy = NULL;
1358 
1359     xmpli_names = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1360     media_types = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1361 
1362     unknown_ns.elements = xml_ns.elements = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1363     unknown_ns.attributes = xml_ns.attributes = wmem_map_new(wmem_epan_scope(), g_str_hash, g_str_equal);
1364 
1365     xml_new_namespace(xmpli_names, "xml", "version", "encoding", "standalone", NULL);
1366 
1367     dirname = get_persconffile_path("dtds", FALSE);
1368 
1369     if (test_for_directory(dirname) != EISDIR) {
1370         /* Although dir isn't a directory it may still use memory */
1371         g_free(dirname);
1372         dirname = get_datafile_path("dtds");
1373     }
1374 
1375     if (test_for_directory(dirname) == EISDIR) {
1376         if ((dir = OPENDIR_OP(dirname)) != NULL) {
1377             GString *errors = g_string_new("");
1378 
1379             while ((file = DIRGETNEXT_OP(dir)) != NULL) {
1380                 guint namelen;
1381                 filename = GETFNAME_OP(file);
1382 
1383                 namelen = (int)strlen(filename);
1384                 if ( namelen > 4 && ( g_ascii_strcasecmp(filename+(namelen-4), ".dtd")  == 0 ) ) {
1385                     GString *preparsed;
1386                     dtd_build_data_t *dtd_data;
1387 
1388                     g_string_truncate(errors, 0);
1389                     preparsed = dtd_preparse(dirname, filename, errors);
1390 
1391                     if (errors->len) {
1392                         report_failure("Dtd Preparser in file %s%c%s: %s",
1393                                        dirname, G_DIR_SEPARATOR, filename, errors->str);
1394                         continue;
1395                     }
1396 
1397                     dtd_data = dtd_parse(preparsed);
1398 
1399                     g_string_free(preparsed, TRUE);
1400 
1401                     if (dtd_data->error->len) {
1402                         report_failure("Dtd Parser in file %s%c%s: %s",
1403                                        dirname, G_DIR_SEPARATOR, filename, dtd_data->error->str);
1404                         destroy_dtd_data(dtd_data);
1405                         continue;
1406                     }
1407 
1408                     register_dtd(dtd_data, errors);
1409 
1410                     if (errors->len) {
1411                         report_failure("Dtd Registration in file: %s%c%s: %s",
1412                                        dirname, G_DIR_SEPARATOR, filename, errors->str);
1413                         continue;
1414                     }
1415                 }
1416             }
1417             g_string_free(errors, TRUE);
1418 
1419             CLOSEDIR_OP(dir);
1420         }
1421     }
1422 
1423     g_free(dirname);
1424 
1425     for(i=0;i<array_length(default_media_types);i++) {
1426         if( ! wmem_map_lookup(media_types, default_media_types[i]) ) {
1427             wmem_map_insert(media_types, (gpointer)default_media_types[i], &xml_ns);
1428         }
1429     }
1430 
1431     wmem_map_foreach(xmpli_names, add_xmlpi_namespace, (gpointer)"xml.xmlpi");
1432 
1433     wmem_free(wmem_epan_scope(), dummy);
1434 }
1435 
1436 void
proto_register_xml(void)1437 proto_register_xml(void)
1438 {
1439     static gint *ett_base[] = {
1440         &unknown_ns.ett,
1441         &xml_ns.ett,
1442         &ett_dtd,
1443         &ett_xmpli
1444     };
1445 
1446     static hf_register_info hf_base[] = {
1447         { &hf_xmlpi,
1448           {"XMLPI", "xml.xmlpi",
1449            FT_STRING, BASE_NONE, NULL, 0,
1450            NULL, HFILL }
1451         },
1452         { &hf_cdatasection,
1453           {"CDATASection", "xml.cdatasection",
1454            FT_STRING, BASE_NONE, NULL, 0,
1455            NULL, HFILL }
1456         },
1457         { &hf_comment,
1458           {"Comment", "xml.comment",
1459            FT_STRING, BASE_NONE, NULL, 0,
1460            NULL, HFILL }
1461         },
1462         { &hf_unknowwn_attrib,
1463           {"Attribute", "xml.attribute",
1464            FT_STRING, BASE_NONE, NULL, 0,
1465            NULL, HFILL }
1466         },
1467         { &hf_doctype,
1468           {"Doctype", "xml.doctype",
1469            FT_STRING, BASE_NONE, NULL, 0,
1470            NULL, HFILL }
1471         },
1472         { &hf_dtd_tag,
1473           {"DTD Tag", "xml.dtdtag",
1474            FT_STRING, BASE_NONE, NULL, 0,
1475            NULL, HFILL }
1476         },
1477         { &unknown_ns.hf_cdata,
1478           {"CDATA", "xml.cdata",
1479            FT_STRING, BASE_NONE, NULL, 0, NULL,
1480            HFILL }
1481         },
1482         { &unknown_ns.hf_tag,
1483           {"Tag", "xml.tag",
1484            FT_STRING, BASE_NONE, NULL, 0,
1485            NULL, HFILL }
1486         },
1487         { &xml_ns.hf_cdata,
1488           {"Unknown", "xml.unknown",
1489            FT_STRING, BASE_NONE, NULL, 0,
1490            NULL, HFILL }
1491         }
1492     };
1493 
1494     static ei_register_info ei[] = {
1495         { &ei_xml_closing_unopened_tag, { "xml.closing_unopened_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened tag", EXPFILL }},
1496         { &ei_xml_closing_unopened_xmpli_tag, { "xml.closing_unopened_xmpli_tag", PI_MALFORMED, PI_ERROR, "Closing an unopened xmpli tag", EXPFILL }},
1497         { &ei_xml_unrecognized_text, { "xml.unrecognized_text", PI_PROTOCOL, PI_WARN, "Unrecognized text", EXPFILL }},
1498     };
1499 
1500     module_t *xml_module;
1501     expert_module_t* expert_xml;
1502 
1503     hf_arr  = wmem_array_new(wmem_epan_scope(), sizeof(hf_register_info));
1504     ett_arr = g_array_new(FALSE, FALSE, sizeof(gint *));
1505 
1506     wmem_array_append(hf_arr, hf_base, array_length(hf_base));
1507     g_array_append_vals(ett_arr, ett_base, array_length(ett_base));
1508 
1509     init_xml_names();
1510 
1511     xml_ns.hf_tag = proto_register_protocol("eXtensible Markup Language", "XML", xml_ns.name);
1512 
1513     proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)wmem_array_get_raw(hf_arr), wmem_array_get_count(hf_arr));
1514     proto_register_subtree_array((gint **)g_array_data(ett_arr), ett_arr->len);
1515     expert_xml = expert_register_protocol(xml_ns.hf_tag);
1516     expert_register_field_array(expert_xml, ei, array_length(ei));
1517 
1518     xml_module = prefs_register_protocol(xml_ns.hf_tag, NULL);
1519     prefs_register_obsolete_preference(xml_module, "heuristic");
1520     prefs_register_obsolete_preference(xml_module, "heuristic_tcp");
1521     prefs_register_obsolete_preference(xml_module, "heuristic_udp");
1522     /* XXX - UCS-2, or UTF-16? */
1523     prefs_register_bool_preference(xml_module, "heuristic_unicode", "Use Unicode in heuristics",
1524                                    "Try to recognize XML encoded in Unicode (UCS-2BE)",
1525                                    &pref_heuristic_unicode);
1526 
1527     g_array_free(ett_arr, TRUE);
1528 
1529     xml_handle = register_dissector("xml", dissect_xml, xml_ns.hf_tag);
1530 
1531     init_xml_parser();
1532 }
1533 
1534 static void
add_dissector_media(gpointer k,gpointer v _U_,gpointer p _U_)1535 add_dissector_media(gpointer k, gpointer v _U_, gpointer p _U_)
1536 {
1537     dissector_add_string("media_type", (gchar *)k, xml_handle);
1538 }
1539 
1540 void
proto_reg_handoff_xml(void)1541 proto_reg_handoff_xml(void)
1542 {
1543     wmem_map_foreach(media_types, add_dissector_media, NULL);
1544     dissector_add_uint_range_with_preference("tcp.port", "", xml_handle);
1545 
1546     heur_dissector_add("http",  dissect_xml_heur, "XML in HTTP", "xml_http", xml_ns.hf_tag, HEURISTIC_DISABLE);
1547     heur_dissector_add("sip",   dissect_xml_heur, "XML in SIP", "xml_sip", xml_ns.hf_tag, HEURISTIC_DISABLE);
1548     heur_dissector_add("media", dissect_xml_heur, "XML in media", "xml_media", xml_ns.hf_tag, HEURISTIC_DISABLE);
1549     heur_dissector_add("tcp", dissect_xml_heur, "XML over TCP", "xml_tcp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1550     heur_dissector_add("udp", dissect_xml_heur, "XML over UDP", "xml_udp", xml_ns.hf_tag, HEURISTIC_DISABLE);
1551 
1552     heur_dissector_add("wtap_file", dissect_xml_heur, "XML file", "xml_wtap", xml_ns.hf_tag, HEURISTIC_ENABLE);
1553 
1554     dissector_add_uint("acdr.tls_application", TLS_APP_XML, xml_handle);
1555 }
1556 
1557 /*
1558  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
1559  *
1560  * Local variables:
1561  * c-basic-offset: 4
1562  * tab-width: 8
1563  * indent-tabs-mode: nil
1564  * End:
1565  *
1566  * vi: set shiftwidth=4 tabstop=8 expandtab:
1567  * :indentSize=4:tabSize=8:noTabs=true:
1568  */
1569