1"""
2SAX driver for the pyexpat C module.  This driver works with
3pyexpat.__version__ == '2.22'.
4"""
5
6version = "0.20"
7
8from xml.sax._exceptions import *
9from xml.sax.handler import feature_validation, feature_namespaces
10from xml.sax.handler import feature_namespace_prefixes
11from xml.sax.handler import feature_external_ges, feature_external_pes
12from xml.sax.handler import feature_string_interning
13from xml.sax.handler import property_xml_string, property_interning_dict
14
15# xml.parsers.expat does not raise ImportError in Jython
16import sys
17if sys.platform[:4] == "java":
18    raise SAXReaderNotAvailable("expat not available in Java", None)
19del sys
20
21try:
22    from xml.parsers import expat
23except ImportError:
24    raise SAXReaderNotAvailable("expat not supported", None)
25else:
26    if not hasattr(expat, "ParserCreate"):
27        raise SAXReaderNotAvailable("expat not supported", None)
28from xml.sax import xmlreader, saxutils, handler
29
30AttributesImpl = xmlreader.AttributesImpl
31AttributesNSImpl = xmlreader.AttributesNSImpl
32
33# If we're using a sufficiently recent version of Python, we can use
34# weak references to avoid cycles between the parser and content
35# handler, otherwise we'll just have to pretend.
36try:
37    import _weakref
38except ImportError:
39    def _mkproxy(o):
40        return o
41else:
42    import weakref
43    _mkproxy = weakref.proxy
44    del weakref, _weakref
45
46class _ClosedParser:
47    pass
48
49# --- ExpatLocator
50
51class ExpatLocator(xmlreader.Locator):
52    """Locator for use with the ExpatParser class.
53
54    This uses a weak reference to the parser object to avoid creating
55    a circular reference between the parser and the content handler.
56    """
57    def __init__(self, parser):
58        self._ref = _mkproxy(parser)
59
60    def getColumnNumber(self):
61        parser = self._ref
62        if parser._parser is None:
63            return None
64        return parser._parser.ErrorColumnNumber
65
66    def getLineNumber(self):
67        parser = self._ref
68        if parser._parser is None:
69            return 1
70        return parser._parser.ErrorLineNumber
71
72    def getPublicId(self):
73        parser = self._ref
74        if parser is None:
75            return None
76        return parser._source.getPublicId()
77
78    def getSystemId(self):
79        parser = self._ref
80        if parser is None:
81            return None
82        return parser._source.getSystemId()
83
84
85# --- ExpatParser
86
87class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
88    """SAX driver for the pyexpat C module."""
89
90    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
91        xmlreader.IncrementalParser.__init__(self, bufsize)
92        self._source = xmlreader.InputSource()
93        self._parser = None
94        self._namespaces = namespaceHandling
95        self._lex_handler_prop = None
96        self._parsing = 0
97        self._entity_stack = []
98        self._external_ges = 1
99        self._interning = None
100
101    # XMLReader methods
102
103    def parse(self, source):
104        "Parse an XML document from a URL or an InputSource."
105        source = saxutils.prepare_input_source(source)
106
107        self._source = source
108        try:
109            self.reset()
110            self._cont_handler.setDocumentLocator(ExpatLocator(self))
111            xmlreader.IncrementalParser.parse(self, source)
112        except:
113            # bpo-30264: Close the source on error to not leak resources:
114            # xml.sax.parse() doesn't give access to the underlying parser
115            # to the caller
116            self._close_source()
117            raise
118
119    def prepareParser(self, source):
120        if source.getSystemId() is not None:
121            base = source.getSystemId()
122            if isinstance(base, unicode):
123                base = base.encode('utf-8')
124            self._parser.SetBase(base)
125
126    # Redefined setContentHandler to allow changing handlers during parsing
127
128    def setContentHandler(self, handler):
129        xmlreader.IncrementalParser.setContentHandler(self, handler)
130        if self._parsing:
131            self._reset_cont_handler()
132
133    def getFeature(self, name):
134        if name == feature_namespaces:
135            return self._namespaces
136        elif name == feature_string_interning:
137            return self._interning is not None
138        elif name in (feature_validation, feature_external_pes,
139                      feature_namespace_prefixes):
140            return 0
141        elif name == feature_external_ges:
142            return self._external_ges
143        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
144
145    def setFeature(self, name, state):
146        if self._parsing:
147            raise SAXNotSupportedException("Cannot set features while parsing")
148
149        if name == feature_namespaces:
150            self._namespaces = state
151        elif name == feature_external_ges:
152            self._external_ges = state
153        elif name == feature_string_interning:
154            if state:
155                if self._interning is None:
156                    self._interning = {}
157            else:
158                self._interning = None
159        elif name == feature_validation:
160            if state:
161                raise SAXNotSupportedException(
162                    "expat does not support validation")
163        elif name == feature_external_pes:
164            if state:
165                raise SAXNotSupportedException(
166                    "expat does not read external parameter entities")
167        elif name == feature_namespace_prefixes:
168            if state:
169                raise SAXNotSupportedException(
170                    "expat does not report namespace prefixes")
171        else:
172            raise SAXNotRecognizedException(
173                "Feature '%s' not recognized" % name)
174
175    def getProperty(self, name):
176        if name == handler.property_lexical_handler:
177            return self._lex_handler_prop
178        elif name == property_interning_dict:
179            return self._interning
180        elif name == property_xml_string:
181            if self._parser:
182                if hasattr(self._parser, "GetInputContext"):
183                    return self._parser.GetInputContext()
184                else:
185                    raise SAXNotRecognizedException(
186                        "This version of expat does not support getting"
187                        " the XML string")
188            else:
189                raise SAXNotSupportedException(
190                    "XML string cannot be returned when not parsing")
191        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
192
193    def setProperty(self, name, value):
194        if name == handler.property_lexical_handler:
195            self._lex_handler_prop = value
196            if self._parsing:
197                self._reset_lex_handler_prop()
198        elif name == property_interning_dict:
199            self._interning = value
200        elif name == property_xml_string:
201            raise SAXNotSupportedException("Property '%s' cannot be set" %
202                                           name)
203        else:
204            raise SAXNotRecognizedException("Property '%s' not recognized" %
205                                            name)
206
207    # IncrementalParser methods
208
209    def feed(self, data, isFinal = 0):
210        if not self._parsing:
211            self.reset()
212            self._parsing = 1
213            self._cont_handler.startDocument()
214
215        try:
216            # The isFinal parameter is internal to the expat reader.
217            # If it is set to true, expat will check validity of the entire
218            # document. When feeding chunks, they are not normally final -
219            # except when invoked from close.
220            self._parser.Parse(data, isFinal)
221        except expat.error, e:
222            exc = SAXParseException(expat.ErrorString(e.code), e, self)
223            # FIXME: when to invoke error()?
224            self._err_handler.fatalError(exc)
225
226    def _close_source(self):
227        source = self._source
228        try:
229            file = source.getCharacterStream()
230            if file is not None:
231                file.close()
232        finally:
233            file = source.getByteStream()
234            if file is not None:
235                file.close()
236
237    def close(self):
238        if (self._entity_stack or self._parser is None or
239            isinstance(self._parser, _ClosedParser)):
240            # If we are completing an external entity, do nothing here
241            return
242        try:
243            self.feed("", isFinal = 1)
244            self._cont_handler.endDocument()
245            self._parsing = 0
246            # break cycle created by expat handlers pointing to our methods
247            self._parser = None
248        finally:
249            self._parsing = 0
250            if self._parser is not None:
251                # Keep ErrorColumnNumber and ErrorLineNumber after closing.
252                parser = _ClosedParser()
253                parser.ErrorColumnNumber = self._parser.ErrorColumnNumber
254                parser.ErrorLineNumber = self._parser.ErrorLineNumber
255                self._parser = parser
256            self._close_source()
257
258    def _reset_cont_handler(self):
259        self._parser.ProcessingInstructionHandler = \
260                                    self._cont_handler.processingInstruction
261        self._parser.CharacterDataHandler = self._cont_handler.characters
262
263    def _reset_lex_handler_prop(self):
264        lex = self._lex_handler_prop
265        parser = self._parser
266        if lex is None:
267            parser.CommentHandler = None
268            parser.StartCdataSectionHandler = None
269            parser.EndCdataSectionHandler = None
270            parser.StartDoctypeDeclHandler = None
271            parser.EndDoctypeDeclHandler = None
272        else:
273            parser.CommentHandler = lex.comment
274            parser.StartCdataSectionHandler = lex.startCDATA
275            parser.EndCdataSectionHandler = lex.endCDATA
276            parser.StartDoctypeDeclHandler = self.start_doctype_decl
277            parser.EndDoctypeDeclHandler = lex.endDTD
278
279    def reset(self):
280        if self._namespaces:
281            self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
282                                              intern=self._interning)
283            self._parser.namespace_prefixes = 1
284            self._parser.StartElementHandler = self.start_element_ns
285            self._parser.EndElementHandler = self.end_element_ns
286        else:
287            self._parser = expat.ParserCreate(self._source.getEncoding(),
288                                              intern = self._interning)
289            self._parser.StartElementHandler = self.start_element
290            self._parser.EndElementHandler = self.end_element
291
292        self._reset_cont_handler()
293        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
294        self._parser.NotationDeclHandler = self.notation_decl
295        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
296        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
297
298        self._decl_handler_prop = None
299        if self._lex_handler_prop:
300            self._reset_lex_handler_prop()
301#         self._parser.DefaultHandler =
302#         self._parser.DefaultHandlerExpand =
303#         self._parser.NotStandaloneHandler =
304        self._parser.ExternalEntityRefHandler = self.external_entity_ref
305        try:
306            self._parser.SkippedEntityHandler = self.skipped_entity_handler
307        except AttributeError:
308            # This pyexpat does not support SkippedEntity
309            pass
310        self._parser.SetParamEntityParsing(
311            expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
312
313        self._parsing = 0
314        self._entity_stack = []
315
316    # Locator methods
317
318    def getColumnNumber(self):
319        if self._parser is None:
320            return None
321        return self._parser.ErrorColumnNumber
322
323    def getLineNumber(self):
324        if self._parser is None:
325            return 1
326        return self._parser.ErrorLineNumber
327
328    def getPublicId(self):
329        return self._source.getPublicId()
330
331    def getSystemId(self):
332        return self._source.getSystemId()
333
334    # event handlers
335    def start_element(self, name, attrs):
336        self._cont_handler.startElement(name, AttributesImpl(attrs))
337
338    def end_element(self, name):
339        self._cont_handler.endElement(name)
340
341    def start_element_ns(self, name, attrs):
342        pair = name.split()
343        if len(pair) == 1:
344            # no namespace
345            pair = (None, name)
346        elif len(pair) == 3:
347            pair = pair[0], pair[1]
348        else:
349            # default namespace
350            pair = tuple(pair)
351
352        newattrs = {}
353        qnames = {}
354        for (aname, value) in attrs.items():
355            parts = aname.split()
356            length = len(parts)
357            if length == 1:
358                # no namespace
359                qname = aname
360                apair = (None, aname)
361            elif length == 3:
362                qname = "%s:%s" % (parts[2], parts[1])
363                apair = parts[0], parts[1]
364            else:
365                # default namespace
366                qname = parts[1]
367                apair = tuple(parts)
368
369            newattrs[apair] = value
370            qnames[apair] = qname
371
372        self._cont_handler.startElementNS(pair, None,
373                                          AttributesNSImpl(newattrs, qnames))
374
375    def end_element_ns(self, name):
376        pair = name.split()
377        if len(pair) == 1:
378            pair = (None, name)
379        elif len(pair) == 3:
380            pair = pair[0], pair[1]
381        else:
382            pair = tuple(pair)
383
384        self._cont_handler.endElementNS(pair, None)
385
386    # this is not used (call directly to ContentHandler)
387    def processing_instruction(self, target, data):
388        self._cont_handler.processingInstruction(target, data)
389
390    # this is not used (call directly to ContentHandler)
391    def character_data(self, data):
392        self._cont_handler.characters(data)
393
394    def start_namespace_decl(self, prefix, uri):
395        self._cont_handler.startPrefixMapping(prefix, uri)
396
397    def end_namespace_decl(self, prefix):
398        self._cont_handler.endPrefixMapping(prefix)
399
400    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
401        self._lex_handler_prop.startDTD(name, pubid, sysid)
402
403    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
404        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
405
406    def notation_decl(self, name, base, sysid, pubid):
407        self._dtd_handler.notationDecl(name, pubid, sysid)
408
409    def external_entity_ref(self, context, base, sysid, pubid):
410        if not self._external_ges:
411            return 1
412
413        source = self._ent_handler.resolveEntity(pubid, sysid)
414        source = saxutils.prepare_input_source(source,
415                                               self._source.getSystemId() or
416                                               "")
417
418        self._entity_stack.append((self._parser, self._source))
419        self._parser = self._parser.ExternalEntityParserCreate(context)
420        self._source = source
421
422        try:
423            xmlreader.IncrementalParser.parse(self, source)
424        except:
425            return 0  # FIXME: save error info here?
426
427        (self._parser, self._source) = self._entity_stack[-1]
428        del self._entity_stack[-1]
429        return 1
430
431    def skipped_entity_handler(self, name, is_pe):
432        if is_pe:
433            # The SAX spec requires to report skipped PEs with a '%'
434            name = '%'+name
435        self._cont_handler.skippedEntity(name)
436
437# ---
438
439def create_parser(*args, **kwargs):
440    return ExpatParser(*args, **kwargs)
441
442# ---
443
444if __name__ == "__main__":
445    import xml.sax.saxutils
446    p = create_parser()
447    p.setContentHandler(xml.sax.saxutils.XMLGenerator())
448    p.setErrorHandler(xml.sax.ErrorHandler())
449    p.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")
450