1""" 2SAX driver for the Java SAX parsers. Can only be used in Jython. 3 4$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $ 5""" 6 7# --- Initialization 8 9version = "0.10" 10revision = "$Revision: 1.5 $" 11 12import string 13from xml.sax import xmlreader, saxutils 14from xml.sax.handler import feature_namespaces, feature_namespace_prefixes 15from xml.sax import _exceptions 16 17# we only work in jython 18import sys 19if sys.platform[:4] != "java": 20 raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None) 21del sys 22 23# get the necessary Java SAX classes 24try: 25 from org.python.core import FilelikeInputStream 26 from org.xml.sax.helpers import XMLReaderFactory 27 from org.xml import sax as javasax 28 from org.xml.sax.ext import LexicalHandler 29except ImportError: 30 raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None) 31 32# get some JAXP stuff 33try: 34 from javax.xml.parsers import SAXParserFactory, ParserConfigurationException 35 factory = SAXParserFactory.newInstance() 36 jaxp = 1 37except ImportError: 38 jaxp = 0 39 40from java.lang import String 41 42 43def _wrap_sax_exception(e): 44 return _exceptions.SAXParseException(e.message, 45 e.exception, 46 SimpleLocator(e.columnNumber, 47 e.lineNumber, 48 e.publicId, 49 e.systemId)) 50 51class JyErrorHandlerWrapper(javasax.ErrorHandler): 52 def __init__(self, err_handler): 53 self._err_handler = err_handler 54 55 def error(self, exc): 56 self._err_handler.error(_wrap_sax_exception(exc)) 57 58 def fatalError(self, exc): 59 self._err_handler.fatalError(_wrap_sax_exception(exc)) 60 61 def warning(self, exc): 62 self._err_handler.warning(_wrap_sax_exception(exc)) 63 64class JyInputSourceWrapper(javasax.InputSource): 65 def __init__(self, source): 66 if isinstance(source, basestring): 67 javasax.InputSource.__init__(self, source) 68 elif hasattr(source, "read"):#file like object 69 f = source 70 javasax.InputSource.__init__(self, FilelikeInputStream(f)) 71 if hasattr(f, "name"): 72 self.setSystemId(f.name) 73 else:#xml.sax.xmlreader.InputSource object 74 #Use byte stream constructor if possible so that Xerces won't attempt to open 75 #the url at systemId unless it's really there 76 if source.getByteStream(): 77 javasax.InputSource.__init__(self, 78 FilelikeInputStream(source.getByteStream())) 79 else: 80 javasax.InputSource.__init__(self) 81 if source.getSystemId(): 82 self.setSystemId(source.getSystemId()) 83 self.setPublicId(source.getPublicId()) 84 self.setEncoding(source.getEncoding()) 85 86class JyEntityResolverWrapper(javasax.EntityResolver): 87 def __init__(self, entityResolver): 88 self._resolver = entityResolver 89 90 def resolveEntity(self, pubId, sysId): 91 return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId)) 92 93class JyDTDHandlerWrapper(javasax.DTDHandler): 94 def __init__(self, dtdHandler): 95 self._handler = dtdHandler 96 97 def notationDecl(self, name, publicId, systemId): 98 self._handler.notationDecl(name, publicId, systemId) 99 100 def unparsedEntityDecl(self, name, publicId, systemId, notationName): 101 self._handler.unparsedEntityDecl(name, publicId, systemId, notationName) 102 103class SimpleLocator(xmlreader.Locator): 104 def __init__(self, colNum, lineNum, pubId, sysId): 105 self.colNum = colNum 106 self.lineNum = lineNum 107 self.pubId = pubId 108 self.sysId = sysId 109 110 def getColumnNumber(self): 111 return self.colNum 112 113 def getLineNumber(self): 114 return self.lineNum 115 116 def getPublicId(self): 117 return self.pubId 118 119 def getSystemId(self): 120 return self.sysId 121 122# --- JavaSAXParser 123class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler, LexicalHandler): 124 "SAX driver for the Java SAX parsers." 125 126 def __init__(self, jdriver = None): 127 xmlreader.XMLReader.__init__(self) 128 self._parser = create_java_parser(jdriver) 129 self._parser.setFeature(feature_namespaces, 0) 130 self._parser.setFeature(feature_namespace_prefixes, 0) 131 self._parser.setContentHandler(self) 132 self._nsattrs = AttributesNSImpl() 133 self._attrs = AttributesImpl() 134 self.setEntityResolver(self.getEntityResolver()) 135 self.setErrorHandler(self.getErrorHandler()) 136 self.setDTDHandler(self.getDTDHandler()) 137 try: 138 self._parser.setProperty("http://xml.org/sax/properties/lexical-handler", self) 139 except Exception, x: 140 pass 141 142 # XMLReader methods 143 144 def parse(self, source): 145 "Parse an XML document from a URL or an InputSource." 146 self._parser.parse(JyInputSourceWrapper(source)) 147 148 def getFeature(self, name): 149 return self._parser.getFeature(name) 150 151 def setFeature(self, name, state): 152 self._parser.setFeature(name, state) 153 154 def getProperty(self, name): 155 return self._parser.getProperty(name) 156 157 def setProperty(self, name, value): 158 self._parser.setProperty(name, value) 159 160 def setEntityResolver(self, resolver): 161 self._parser.entityResolver = JyEntityResolverWrapper(resolver) 162 xmlreader.XMLReader.setEntityResolver(self, resolver) 163 164 def setErrorHandler(self, err_handler): 165 self._parser.errorHandler = JyErrorHandlerWrapper(err_handler) 166 xmlreader.XMLReader.setErrorHandler(self, err_handler) 167 168 def setDTDHandler(self, dtd_handler): 169 self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler)) 170 xmlreader.XMLReader.setDTDHandler(self, dtd_handler) 171 172 # ContentHandler methods 173 def setDocumentLocator(self, locator): 174 self._cont_handler.setDocumentLocator(locator) 175 176 def startDocument(self): 177 self._cont_handler.startDocument() 178 self._namespaces = self._parser.getFeature(feature_namespaces) 179 180 def startElement(self, uri, lname, qname, attrs): 181 if self._namespaces: 182 self._nsattrs._attrs = attrs 183 self._cont_handler.startElementNS((uri or None, lname), qname, 184 self._nsattrs) 185 else: 186 self._attrs._attrs = attrs 187 self._cont_handler.startElement(qname, self._attrs) 188 189 def startPrefixMapping(self, prefix, uri): 190 self._cont_handler.startPrefixMapping(prefix, uri) 191 192 def characters(self, char, start, len): 193 self._cont_handler.characters(unicode(String(char, start, len))) 194 195 def ignorableWhitespace(self, char, start, len): 196 self._cont_handler.ignorableWhitespace(unicode(String(char, start, 197 len))) 198 199 def endElement(self, uri, lname, qname): 200 if self._namespaces: 201 self._cont_handler.endElementNS((uri or None, lname), qname) 202 else: 203 self._cont_handler.endElement(qname) 204 205 def endPrefixMapping(self, prefix): 206 self._cont_handler.endPrefixMapping(prefix) 207 208 def endDocument(self): 209 self._cont_handler.endDocument() 210 211 def processingInstruction(self, target, data): 212 self._cont_handler.processingInstruction(target, data) 213 214 # Lexical handler methods 215 def comment(self, char, start, len): 216 try: 217 # Need to wrap this in a try..except in case the parser does not support lexical events 218 self._cont_handler.comment(unicode(String(char, start, len))) 219 except: 220 pass 221 222 def startCDATA(self): 223 pass # TODO 224 225 def endCDATA(self): 226 pass # TODO 227 228 def startDTD(self, name, publicId, systemId): 229 pass # TODO 230 231 def endDTD(self): 232 pass # TODO 233 234 def startEntity(self, name): 235 pass # TODO 236 237 def endEntity(self, name): 238 pass # TODO 239 240def _fixTuple(nsTuple, frm, to): 241 if isinstance(nsTuple, tuple) and len(nsTuple) == 2: 242 nsUri, localName = nsTuple 243 if nsUri == frm: 244 nsUri = to 245 return (nsUri, localName) 246 return nsTuple 247 248def _makeJavaNsTuple(nsTuple): 249 return _fixTuple(nsTuple, None, '') 250 251def _makePythonNsTuple(nsTuple): 252 return _fixTuple(nsTuple, '', None) 253 254class AttributesImpl: 255 256 def __init__(self, attrs = None): 257 self._attrs = attrs 258 259 def getLength(self): 260 return self._attrs.getLength() 261 262 def getType(self, name): 263 return self._attrs.getType(_makeJavaNsTuple(name)) 264 265 def getValue(self, name): 266 value = self._attrs.getValue(_makeJavaNsTuple(name)) 267 if value == None: 268 raise KeyError(name) 269 return value 270 271 def getNames(self): 272 return [_makePythonNsTuple(self._attrs.getQName(index)) for index in range(len(self))] 273 274 def getQNames(self): 275 return [self._attrs.getQName(index) for index in range(len(self))] 276 277 def getValueByQName(self, qname): 278 idx = self._attrs.getIndex(qname) 279 if idx == -1: 280 raise KeyError, qname 281 return self._attrs.getValue(idx) 282 283 def getNameByQName(self, qname): 284 idx = self._attrs.getIndex(qname) 285 if idx == -1: 286 raise KeyError, qname 287 return qname 288 289 def getQNameByName(self, name): 290 idx = self._attrs.getIndex(_makeJavaNsTuple(name)) 291 if idx == -1: 292 raise KeyError, name 293 return name 294 295 def __len__(self): 296 return self._attrs.getLength() 297 298 def __getitem__(self, name): 299 return self.getValue(name) 300 301 def keys(self): 302 return self.getNames() 303 304 def copy(self): 305 return self.__class__(self._attrs) 306 307 def items(self): 308 return [(name, self[name]) for name in self.getNames()] 309 310 def values(self): 311 return map(self.getValue, self.getNames()) 312 313 def get(self, name, alt=None): 314 try: 315 return self.getValue(name) 316 except KeyError: 317 return alt 318 319 def has_key(self, name): 320 try: 321 self.getValue(name) 322 return True 323 except KeyError: 324 return False 325 326# --- AttributesNSImpl 327 328class AttributesNSImpl(AttributesImpl): 329 330 def __init__(self, attrs=None): 331 AttributesImpl.__init__(self, attrs) 332 333 def getType(self, name): 334 name = _makeJavaNsTuple(name) 335 return self._attrs.getType(name[0], name[1]) 336 337 def getValue(self, name): 338 jname = _makeJavaNsTuple(name) 339 value = self._attrs.getValue(jname[0], jname[1]) 340 if value == None: 341 raise KeyError(name) 342 return value 343 344 def getNames(self): 345 names = [] 346 for idx in range(len(self)): 347 names.append(_makePythonNsTuple( (self._attrs.getURI(idx), self._attrs.getLocalName(idx)) )) 348 return names 349 350 def getNameByQName(self, qname): 351 idx = self._attrs.getIndex(qname) 352 if idx == -1: 353 raise KeyError, qname 354 return _makePythonNsTuple( (self._attrs.getURI(idx), self._attrs.getLocalName(idx)) ) 355 356 def getQNameByName(self, name): 357 name = _makeJavaNsTuple(name) 358 idx = self._attrs.getIndex(name[0], name[1]) 359 if idx == -1: 360 raise KeyError, name 361 return self._attrs.getQName(idx) 362 363 def getQNames(self): 364 return [self._attrs.getQName(idx) for idx in range(len(self))] 365 366# --- 367 368def create_java_parser(jdriver = None): 369 try: 370 if jdriver: 371 return XMLReaderFactory.createXMLReader(jdriver) 372 elif jaxp: 373 return factory.newSAXParser().getXMLReader() 374 else: 375 return XMLReaderFactory.createXMLReader() 376 except ParserConfigurationException, e: 377 raise _exceptions.SAXReaderNotAvailable(e.getMessage()) 378 except javasax.SAXException, e: 379 raise _exceptions.SAXReaderNotAvailable(e.getMessage()) 380 381def create_parser(jdriver = None): 382 return JavaSAXParser(jdriver) 383