1import xml.parsers.expat, sys,traceback
2
3class xmlobj(object):
4
5	def __init__(self):
6		self.name = ''
7		self.cdata = ''
8		self.attrs = {}
9		self.subs = [] # list of subs
10		self.subnames = {} # 2nd way to access subs
11		self.parent = None
12		self.index = -1
13
14	def __getattr__(self, name):
15		return self.subnames.get(name, None)
16
17	def sub(self, name):
18		return self.subnames.get(name, None)
19
20	def __getitem__(self, key):
21		if key == '_cdata':
22			return self.cdata
23		return self.attrs.get(key,'')
24
25	def __iter__(self):
26		return self
27
28	def next(self):
29		self.index += 1
30		if self.index > len(self.subs) - 1:
31			raise StopIteration, "Looped through all sub elements."
32		return self.subs[self.index]
33
34	def getxml(self, depth = 0):
35		tab = '\t' * depth
36		attrstring = ''
37		for attr_name in self.attrs:
38			attrstring += ' %s="%s"' % (attr_name, self.attrs[attr_name])
39		xmlstring = "%s<%s%s" % (tab, self.name, attrstring)
40		if self.cdata == '' and self.subs == []:
41			xmlstring += '/>'
42		else:
43			xmlstring += '>'
44			if self.cdata != '':
45				xmlstring += '\n%s' % self.cdata
46			for sub in self.subs:
47				xmlstring += "\n" + sub.getxml(depth + 1)
48			xmlstring += "\n%s</%s>" % (tab,self.name)
49		return xmlstring
50
51class xmllib(object):
52	def __init__(self, xml_file, type='string'):
53		self.parser = xml.parsers.expat.ParserCreate()
54		self.parser.CharacterDataHandler = self.h_data
55		self.parser.StartElementHandler = self.h_se
56		self.parser.EndElementHandler = self.h_ee
57		self.root = xmlobj()
58		self.element = self.root
59		try:
60			if type == 'file':
61				self.parser.ParseFile(open(xml_file, 'r'))
62			elif type == 'string':
63				self.parser.Parse(xml_file)
64		except xml.parsers.expat.ExpatError:
65			#exception, tb,rowcol =  traceback.format_exc().split('\n')[-2].split(':',3)
66			##print "-" * 80
67			#print "XML Error!\n%s on %s\n%s" % (xml_file, rowcol.strip(), tb.strip())
68			#print "-" * 80
69			print "Expat Parsing Error."
70			sys.exit()
71
72	def h_se(self, name, attrs):
73		newelement = xmlobj() # start a new element
74		newelement.name = name # name it
75		newelement.attrs = attrs # assign attributes
76		newelement.parent = self.element # assign the current element as it's parent
77		self.element.subs.append(newelement) # apparend to subs of the current element
78		# appending subname
79		if not self.element.subnames.has_key(name):
80			self.element.subnames[name] = []
81		self.element.subnames[name].append(newelement)
82		# ----------------
83		self.element = newelement # assign the new element as the current one
84
85	def h_data(self, data):
86		self.element.cdata += data # assign cdata
87
88	def h_ee(self, name):
89		self.element.cdata = self.element.cdata.strip()
90		self.element = self.element.parent # element is done, set current to parent element
91
92	def getroot(self):
93		return self.root.subs[0]
94
95def loadfile(file):
96	xmlparser = xmllib(file)
97	return xmlparser.getroot()
98
99if __name__ == '__main__':
100	message = """<message to='user@gmail.com' from='john@doe.com' id='abc123'>
101	<subject>Google Federated!</subject>
102	<body>I can totally send you messages from gmail now!</body>
103</message>"""
104	xmlparser = xmllib(message)
105	xml = xmlparser.getroot()
106	for sub in xml:
107		print sub.name
108		print sub['_cdata']
109	print xml.sub('body')[0]['_cdata']
110	print xml['from']
111