1"""
2Routines for reading PDML produced from TShark.
3
4Copyright (c) 2003, 2013 by Gilbert Ramirez <gram@alumni.rice.edu>
5
6SPDX-License-Identifier: GPL-2.0-or-later
7"""
8
9import sys
10import xml.sax
11from xml.sax.saxutils import quoteattr
12import cStringIO as StringIO
13
14class CaptureFile:
15    pass
16
17class FoundItException(Exception):
18    """Used internally for exiting a tree search"""
19    pass
20
21class PacketList:
22    """Holds Packet objects, and has methods for finding
23    items within it."""
24
25    def __init__(self, children=None):
26        if children is None:
27            self.children = []
28        else:
29            self.children = children
30
31    def __getitem__(self, index):
32        """We act like a list."""
33        return self.children[index]
34
35    def __len__(self):
36        return len(self.children)
37
38    def item_exists(self, name):
39        """Does an item with name 'name' exist in this
40        PacketList? Returns True or False."""
41        for child in self.children:
42            if child.name == name:
43                return True
44
45        try:
46            for child in self.children:
47                child._item_exists(name)
48
49        except FoundItException:
50            return True
51
52        return False
53
54    def _item_exists(self, name):
55        for child in self.children:
56            if child.name == name:
57                raise FoundItException
58            child._item_exists(name)
59
60
61    def get_items(self, name, items=None):
62        """Return all items that match the name 'name'.
63        They are returned in order of a depth-first-search."""
64        if items is None:
65            top_level = 1
66            items = []
67        else:
68            top_level = 0
69
70        for child in self.children:
71            if child.name == name:
72                items.append(child)
73            child.get_items(name, items)
74
75        if top_level:
76            return PacketList(items)
77
78    def get_items_before(self, name, before_item, items=None):
79        """Return all items that match the name 'name' that
80        exist before the before_item. The before_item is an object.
81        They results are returned in order of a depth-first-search.
82        This function allows you to find fields from protocols that occur
83        before other protocols. For example, if you have an HTTP
84        protocol, you can find all tcp.dstport fields *before* that HTTP
85        protocol. This helps analyze in the presence of tunneled protocols."""
86        if items is None:
87            top_level = 1
88            items = []
89        else:
90            top_level = 0
91
92        for child in self.children:
93            if top_level == 1 and child == before_item:
94                break
95            if child.name == name:
96                items.append(child)
97            # Call get_items because the 'before_item' applies
98            # only to the top level search.
99            child.get_items(name, items)
100
101        if top_level:
102            return PacketList(items)
103
104
105class ProtoTreeItem(PacketList):
106    def __init__(self, xmlattrs):
107        PacketList.__init__(self)
108
109        self.name = xmlattrs.get("name", "")
110        self.showname = xmlattrs.get("showname", "")
111        self.pos = xmlattrs.get("pos", "")
112        self.size = xmlattrs.get("size", "")
113        self.value = xmlattrs.get("value", "")
114        self.show = xmlattrs.get("show", "")
115        self.hide = xmlattrs.get("hide", "")
116
117    def add_child(self, child):
118        self.children.append(child)
119
120    def get_name(self):
121        return self.name
122
123    def get_showname(self):
124        return self.showname
125
126    def get_pos(self):
127        return self.pos
128
129    def get_size(self):
130        return self.size
131
132    def get_value(self):
133        return self.value
134
135    def get_show(self):
136        return self.show
137
138    def get_hide(self):
139        return self.hide
140
141    def dump(self, fh=sys.stdout):
142        if self.name:
143            print >> fh, " name=%s" % (quoteattr(self.name),),
144
145        if self.showname:
146            print >> fh, "showname=%s" % (quoteattr(self.showname),),
147
148        if self.pos:
149            print >> fh, "pos=%s" % (quoteattr(self.pos),),
150
151        if self.size:
152            print >> fh, "size=%s" % (quoteattr(self.size),),
153
154        if self.value:
155            print >> fh, "value=%s" % (quoteattr(self.value),),
156
157        if self.show:
158            print >> fh, "show=%s" % (quoteattr(self.show),),
159
160        if self.hide:
161            print >> fh, "hide=%s" % (quoteattr(self.hide),),
162
163class Packet(ProtoTreeItem, PacketList):
164    def dump(self, fh=sys.stdout, indent=0):
165        print >> fh, "  " * indent, "<packet>"
166        indent += 1
167        for child in self.children:
168            child.dump(fh, indent)
169        print >> fh, "  " * indent, "</packet>"
170
171
172class Protocol(ProtoTreeItem):
173
174    def dump(self, fh=sys.stdout, indent=0):
175        print >> fh, "%s<proto " %  ("  " * indent,),
176
177        ProtoTreeItem.dump(self, fh)
178
179        print >> fh, '>'
180
181        indent += 1
182        for child in self.children:
183            child.dump(fh, indent)
184        print >> fh, "  " * indent, "</proto>"
185
186
187class Field(ProtoTreeItem):
188
189    def dump(self, fh=sys.stdout, indent=0):
190        print >> fh, "%s<field " % ("  " * indent,),
191
192        ProtoTreeItem.dump(self, fh)
193
194        if self.children:
195            print >> fh, ">"
196            indent += 1
197            for child in self.children:
198                child.dump(fh, indent)
199            print >> fh, "  " * indent, "</field>"
200
201        else:
202            print >> fh, "/>"
203
204
205class ParseXML(xml.sax.handler.ContentHandler):
206
207    ELEMENT_FILE        = "pdml"
208    ELEMENT_FRAME       = "packet"
209    ELEMENT_PROTOCOL    = "proto"
210    ELEMENT_FIELD       = "field"
211
212    def __init__(self, cb):
213        self.cb = cb
214        self.chars = ""
215        self.element_stack = []
216
217    def startElement(self, name, xmlattrs):
218        self.chars = ""
219
220        if name == self.ELEMENT_FILE:
221            # Eventually, we should check version number of pdml here
222            elem = CaptureFile()
223
224        elif name == self.ELEMENT_FRAME:
225            elem = Packet(xmlattrs)
226
227        elif name == self.ELEMENT_PROTOCOL:
228            elem = Protocol(xmlattrs)
229
230        elif name == self.ELEMENT_FIELD:
231            elem = Field(xmlattrs)
232
233        else:
234            sys.exit("Unknown element: %s" % (name,))
235
236        self.element_stack.append(elem)
237
238
239    def endElement(self, name):
240        elem = self.element_stack.pop()
241
242#        if isinstance(elem, Field):
243#            if elem.get_name() == "frame.number":
244#                print >> sys.stderr, "Packet:", elem.get_show()
245
246        # Add element as child to previous element as long
247        # as there is more than 1 element in the stack. Only
248        # one element in the stack means that the the element in
249        # the stack is the single CaptureFile element, and we don't
250        # want to add this element to that, as we only want one
251        # Packet element in memory at a time.
252        if len(self.element_stack) > 1:
253            parent_elem = self.element_stack[-1]
254            parent_elem.add_child(elem)
255
256        self.chars = ""
257
258        # If we just finished a Packet element, hand it to the
259        # user's callback.
260        if isinstance(elem, Packet):
261            self.cb(elem)
262
263    def characters(self, chars):
264        self.chars = self.chars + chars
265
266
267def _create_parser(cb):
268    """Internal function for setting up the SAX parser."""
269
270    # Create a parser
271    parser = xml.sax.make_parser()
272
273    # Create the handler
274    handler = ParseXML(cb)
275
276    # Tell the parser to use our handler
277    parser.setContentHandler(handler)
278
279    # Don't fetch the DTD, in case it is listed
280    parser.setFeature(xml.sax.handler.feature_external_ges, False)
281
282    return parser
283
284def parse_fh(fh, cb):
285    """Parse a PDML file, given filehandle, and call the callback function (cb),
286    once for each Packet object."""
287
288    parser = _create_parser(cb)
289
290    # Parse the file
291    parser.parse(fh)
292
293    # Close the parser ; this is erroring out, but I'm not sure why.
294    #parser.close()
295
296def parse_string(text, cb):
297    """Parse the PDML contained in a string."""
298    stream = StringIO.StringIO(text)
299    parse_fh(stream, cb)
300
301def _test():
302    import sys
303
304    def test_cb(obj):
305        pass
306
307    filename = sys.argv[1]
308    fh = open(filename, "r")
309    parse_fh(fh, test_cb)
310
311if __name__ == '__main__':
312    _test()
313