1import xml.parsers.expat, sys,traceback 2 3class xmlobj(object): 4 5 def __init__(self): 6 self.name = '' 7 self.cdata = '' 8 self.attrs = {} 9 self.subs = [] # list of subs 10 self.subnames = {} # 2nd way to access subs 11 self.parent = None 12 self.index = -1 13 14 def __getattr__(self, name): 15 return self.subnames.get(name, None) 16 17 def sub(self, name): 18 return self.subnames.get(name, None) 19 20 def __getitem__(self, key): 21 if key == '_cdata': 22 return self.cdata 23 return self.attrs.get(key,'') 24 25 def __iter__(self): 26 return self 27 28 def next(self): 29 self.index += 1 30 if self.index > len(self.subs) - 1: 31 raise StopIteration, "Looped through all sub elements." 32 return self.subs[self.index] 33 34 def getxml(self, depth = 0): 35 tab = '\t' * depth 36 attrstring = '' 37 for attr_name in self.attrs: 38 attrstring += ' %s="%s"' % (attr_name, self.attrs[attr_name]) 39 xmlstring = "%s<%s%s" % (tab, self.name, attrstring) 40 if self.cdata == '' and self.subs == []: 41 xmlstring += '/>' 42 else: 43 xmlstring += '>' 44 if self.cdata != '': 45 xmlstring += '\n%s' % self.cdata 46 for sub in self.subs: 47 xmlstring += "\n" + sub.getxml(depth + 1) 48 xmlstring += "\n%s</%s>" % (tab,self.name) 49 return xmlstring 50 51class xmllib(object): 52 def __init__(self, xml_file, type='string'): 53 self.parser = xml.parsers.expat.ParserCreate() 54 self.parser.CharacterDataHandler = self.h_data 55 self.parser.StartElementHandler = self.h_se 56 self.parser.EndElementHandler = self.h_ee 57 self.root = xmlobj() 58 self.element = self.root 59 try: 60 if type == 'file': 61 self.parser.ParseFile(open(xml_file, 'r')) 62 elif type == 'string': 63 self.parser.Parse(xml_file) 64 except xml.parsers.expat.ExpatError: 65 #exception, tb,rowcol = traceback.format_exc().split('\n')[-2].split(':',3) 66 ##print "-" * 80 67 #print "XML Error!\n%s on %s\n%s" % (xml_file, rowcol.strip(), tb.strip()) 68 #print "-" * 80 69 print "Expat Parsing Error." 70 sys.exit() 71 72 def h_se(self, name, attrs): 73 newelement = xmlobj() # start a new element 74 newelement.name = name # name it 75 newelement.attrs = attrs # assign attributes 76 newelement.parent = self.element # assign the current element as it's parent 77 self.element.subs.append(newelement) # apparend to subs of the current element 78 # appending subname 79 if not self.element.subnames.has_key(name): 80 self.element.subnames[name] = [] 81 self.element.subnames[name].append(newelement) 82 # ---------------- 83 self.element = newelement # assign the new element as the current one 84 85 def h_data(self, data): 86 self.element.cdata += data # assign cdata 87 88 def h_ee(self, name): 89 self.element.cdata = self.element.cdata.strip() 90 self.element = self.element.parent # element is done, set current to parent element 91 92 def getroot(self): 93 return self.root.subs[0] 94 95def loadfile(file): 96 xmlparser = xmllib(file) 97 return xmlparser.getroot() 98 99if __name__ == '__main__': 100 message = """<message to='user@gmail.com' from='john@doe.com' id='abc123'> 101 <subject>Google Federated!</subject> 102 <body>I can totally send you messages from gmail now!</body> 103</message>""" 104 xmlparser = xmllib(message) 105 xml = xmlparser.getroot() 106 for sub in xml: 107 print sub.name 108 print sub['_cdata'] 109 print xml.sub('body')[0]['_cdata'] 110 print xml['from'] 111