1############################################################################## 2# 3# Copyright (c) 2001, 2002 Zope Foundation and Contributors. 4# All Rights Reserved. 5# 6# This software is subject to the provisions of the Zope Public License, 7# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 8# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 9# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 10# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 11# FOR A PARTICULAR PURPOSE. 12# 13############################################################################## 14"""Generic Expat-based XML parser base class. 15 16This creates a parser with namespace processing enabled. 17""" 18import logging 19 20 21class XMLParser(object): 22 23 ordered_attributes = 0 24 25 handler_names = [ 26 "StartElementHandler", 27 "EndElementHandler", 28 "ProcessingInstructionHandler", 29 "CharacterDataHandler", 30 "UnparsedEntityDeclHandler", 31 "NotationDeclHandler", 32 "StartNamespaceDeclHandler", 33 "EndNamespaceDeclHandler", 34 "CommentHandler", 35 "StartCdataSectionHandler", 36 "EndCdataSectionHandler", 37 "DefaultHandler", 38 "DefaultHandlerExpand", 39 "NotStandaloneHandler", 40 "ExternalEntityRefHandler", 41 "XmlDeclHandler", 42 "StartDoctypeDeclHandler", 43 "EndDoctypeDeclHandler", 44 "ElementDeclHandler", 45 "AttlistDeclHandler" 46 ] 47 48 def __init__(self, encoding=None): 49 self.parser = p = self.createParser(encoding) 50 if self.ordered_attributes: 51 try: 52 self.parser.ordered_attributes = self.ordered_attributes 53 except AttributeError: 54 logging.warn("TAL.XMLParser: Can't set ordered_attributes") 55 self.ordered_attributes = 0 56 for name in self.handler_names: 57 method = getattr(self, name, None) 58 if method is not None: 59 try: 60 setattr(p, name, method) 61 except AttributeError: 62 logging.error("TAL.XMLParser: Can't set " 63 "expat handler %s" % name) 64 65 def createParser(self, encoding=None): 66 global XMLParseError 67 from xml.parsers import expat 68 XMLParseError = expat.ExpatError 69 return expat.ParserCreate(encoding, ' ') 70 71 def parseFile(self, filename): 72 self.parseStream(open(filename)) 73 74 def parseString(self, s): 75 if isinstance(s, unicode): 76 # Expat cannot deal with unicode strings, only with 77 # encoded ones. Also, its range of encodings is rather 78 # limited, UTF-8 is the safest bet here. 79 s = s.encode('utf-8') 80 self.parser.Parse(s, 1) 81 82 def parseURL(self, url): 83 import urllib 84 self.parseStream(urllib.urlopen(url)) 85 86 def parseStream(self, stream): 87 self.parser.ParseFile(stream) 88 89 def parseFragment(self, s, end=0): 90 self.parser.Parse(s, end) 91 92 def getpos(self): 93 # Apparently ErrorLineNumber and ErrorLineNumber contain the current 94 # position even when there was no error. This contradicts the official 95 # documentation[1], but expat.h[2] contains the following definition: 96 # 97 # /* For backwards compatibility with previous versions. */ 98 # #define XML_GetErrorLineNumber XML_GetCurrentLineNumber 99 # 100 # [1] http://python.org/doc/current/lib/xmlparser-objects.html 101 # [2] http://cvs.sourceforge.net/viewcvs.py/expat/expat/lib/expat.h 102 return (self.parser.ErrorLineNumber, self.parser.ErrorColumnNumber) 103 104