1""" 2Routines for reading PDML produced from TShark. 3 4Copyright (c) 2003, 2013 by Gilbert Ramirez <gram@alumni.rice.edu> 5 6SPDX-License-Identifier: GPL-2.0-or-later 7""" 8 9import sys 10import xml.sax 11from xml.sax.saxutils import quoteattr 12import cStringIO as StringIO 13 14class CaptureFile: 15 pass 16 17class FoundItException(Exception): 18 """Used internally for exiting a tree search""" 19 pass 20 21class PacketList: 22 """Holds Packet objects, and has methods for finding 23 items within it.""" 24 25 def __init__(self, children=None): 26 if children is None: 27 self.children = [] 28 else: 29 self.children = children 30 31 def __getitem__(self, index): 32 """We act like a list.""" 33 return self.children[index] 34 35 def __len__(self): 36 return len(self.children) 37 38 def item_exists(self, name): 39 """Does an item with name 'name' exist in this 40 PacketList? Returns True or False.""" 41 for child in self.children: 42 if child.name == name: 43 return True 44 45 try: 46 for child in self.children: 47 child._item_exists(name) 48 49 except FoundItException: 50 return True 51 52 return False 53 54 def _item_exists(self, name): 55 for child in self.children: 56 if child.name == name: 57 raise FoundItException 58 child._item_exists(name) 59 60 61 def get_items(self, name, items=None): 62 """Return all items that match the name 'name'. 63 They are returned in order of a depth-first-search.""" 64 if items is None: 65 top_level = 1 66 items = [] 67 else: 68 top_level = 0 69 70 for child in self.children: 71 if child.name == name: 72 items.append(child) 73 child.get_items(name, items) 74 75 if top_level: 76 return PacketList(items) 77 78 def get_items_before(self, name, before_item, items=None): 79 """Return all items that match the name 'name' that 80 exist before the before_item. The before_item is an object. 81 They results are returned in order of a depth-first-search. 82 This function allows you to find fields from protocols that occur 83 before other protocols. For example, if you have an HTTP 84 protocol, you can find all tcp.dstport fields *before* that HTTP 85 protocol. This helps analyze in the presence of tunneled protocols.""" 86 if items is None: 87 top_level = 1 88 items = [] 89 else: 90 top_level = 0 91 92 for child in self.children: 93 if top_level == 1 and child == before_item: 94 break 95 if child.name == name: 96 items.append(child) 97 # Call get_items because the 'before_item' applies 98 # only to the top level search. 99 child.get_items(name, items) 100 101 if top_level: 102 return PacketList(items) 103 104 105class ProtoTreeItem(PacketList): 106 def __init__(self, xmlattrs): 107 PacketList.__init__(self) 108 109 self.name = xmlattrs.get("name", "") 110 self.showname = xmlattrs.get("showname", "") 111 self.pos = xmlattrs.get("pos", "") 112 self.size = xmlattrs.get("size", "") 113 self.value = xmlattrs.get("value", "") 114 self.show = xmlattrs.get("show", "") 115 self.hide = xmlattrs.get("hide", "") 116 117 def add_child(self, child): 118 self.children.append(child) 119 120 def get_name(self): 121 return self.name 122 123 def get_showname(self): 124 return self.showname 125 126 def get_pos(self): 127 return self.pos 128 129 def get_size(self): 130 return self.size 131 132 def get_value(self): 133 return self.value 134 135 def get_show(self): 136 return self.show 137 138 def get_hide(self): 139 return self.hide 140 141 def dump(self, fh=sys.stdout): 142 if self.name: 143 print >> fh, " name=%s" % (quoteattr(self.name),), 144 145 if self.showname: 146 print >> fh, "showname=%s" % (quoteattr(self.showname),), 147 148 if self.pos: 149 print >> fh, "pos=%s" % (quoteattr(self.pos),), 150 151 if self.size: 152 print >> fh, "size=%s" % (quoteattr(self.size),), 153 154 if self.value: 155 print >> fh, "value=%s" % (quoteattr(self.value),), 156 157 if self.show: 158 print >> fh, "show=%s" % (quoteattr(self.show),), 159 160 if self.hide: 161 print >> fh, "hide=%s" % (quoteattr(self.hide),), 162 163class Packet(ProtoTreeItem, PacketList): 164 def dump(self, fh=sys.stdout, indent=0): 165 print >> fh, " " * indent, "<packet>" 166 indent += 1 167 for child in self.children: 168 child.dump(fh, indent) 169 print >> fh, " " * indent, "</packet>" 170 171 172class Protocol(ProtoTreeItem): 173 174 def dump(self, fh=sys.stdout, indent=0): 175 print >> fh, "%s<proto " % (" " * indent,), 176 177 ProtoTreeItem.dump(self, fh) 178 179 print >> fh, '>' 180 181 indent += 1 182 for child in self.children: 183 child.dump(fh, indent) 184 print >> fh, " " * indent, "</proto>" 185 186 187class Field(ProtoTreeItem): 188 189 def dump(self, fh=sys.stdout, indent=0): 190 print >> fh, "%s<field " % (" " * indent,), 191 192 ProtoTreeItem.dump(self, fh) 193 194 if self.children: 195 print >> fh, ">" 196 indent += 1 197 for child in self.children: 198 child.dump(fh, indent) 199 print >> fh, " " * indent, "</field>" 200 201 else: 202 print >> fh, "/>" 203 204 205class ParseXML(xml.sax.handler.ContentHandler): 206 207 ELEMENT_FILE = "pdml" 208 ELEMENT_FRAME = "packet" 209 ELEMENT_PROTOCOL = "proto" 210 ELEMENT_FIELD = "field" 211 212 def __init__(self, cb): 213 self.cb = cb 214 self.chars = "" 215 self.element_stack = [] 216 217 def startElement(self, name, xmlattrs): 218 self.chars = "" 219 220 if name == self.ELEMENT_FILE: 221 # Eventually, we should check version number of pdml here 222 elem = CaptureFile() 223 224 elif name == self.ELEMENT_FRAME: 225 elem = Packet(xmlattrs) 226 227 elif name == self.ELEMENT_PROTOCOL: 228 elem = Protocol(xmlattrs) 229 230 elif name == self.ELEMENT_FIELD: 231 elem = Field(xmlattrs) 232 233 else: 234 sys.exit("Unknown element: %s" % (name,)) 235 236 self.element_stack.append(elem) 237 238 239 def endElement(self, name): 240 elem = self.element_stack.pop() 241 242# if isinstance(elem, Field): 243# if elem.get_name() == "frame.number": 244# print >> sys.stderr, "Packet:", elem.get_show() 245 246 # Add element as child to previous element as long 247 # as there is more than 1 element in the stack. Only 248 # one element in the stack means that the the element in 249 # the stack is the single CaptureFile element, and we don't 250 # want to add this element to that, as we only want one 251 # Packet element in memory at a time. 252 if len(self.element_stack) > 1: 253 parent_elem = self.element_stack[-1] 254 parent_elem.add_child(elem) 255 256 self.chars = "" 257 258 # If we just finished a Packet element, hand it to the 259 # user's callback. 260 if isinstance(elem, Packet): 261 self.cb(elem) 262 263 def characters(self, chars): 264 self.chars = self.chars + chars 265 266 267def _create_parser(cb): 268 """Internal function for setting up the SAX parser.""" 269 270 # Create a parser 271 parser = xml.sax.make_parser() 272 273 # Create the handler 274 handler = ParseXML(cb) 275 276 # Tell the parser to use our handler 277 parser.setContentHandler(handler) 278 279 # Don't fetch the DTD, in case it is listed 280 parser.setFeature(xml.sax.handler.feature_external_ges, False) 281 282 return parser 283 284def parse_fh(fh, cb): 285 """Parse a PDML file, given filehandle, and call the callback function (cb), 286 once for each Packet object.""" 287 288 parser = _create_parser(cb) 289 290 # Parse the file 291 parser.parse(fh) 292 293 # Close the parser ; this is erroring out, but I'm not sure why. 294 #parser.close() 295 296def parse_string(text, cb): 297 """Parse the PDML contained in a string.""" 298 stream = StringIO.StringIO(text) 299 parse_fh(stream, cb) 300 301def _test(): 302 import sys 303 304 def test_cb(obj): 305 pass 306 307 filename = sys.argv[1] 308 fh = open(filename, "r") 309 parse_fh(fh, test_cb) 310 311if __name__ == '__main__': 312 _test() 313