1# SAX-like interfaces
2
3class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError):
4    """
5    An XMLSyntaxError that additionally inherits from AssertionError for
6    ElementTree / backwards compatibility reasons.
7
8    This class may get replaced by a plain XMLSyntaxError in a future version.
9    """
10
11
12ctypedef enum _SaxParserEvents:
13    SAX_EVENT_START    = 1 << 0
14    SAX_EVENT_END      = 1 << 1
15    SAX_EVENT_DATA     = 1 << 2
16    SAX_EVENT_DOCTYPE  = 1 << 3
17    SAX_EVENT_PI       = 1 << 4
18    SAX_EVENT_COMMENT  = 1 << 5
19    SAX_EVENT_START_NS = 1 << 6
20    SAX_EVENT_END_NS   = 1 << 7
21
22ctypedef enum _ParseEventFilter:
23    PARSE_EVENT_FILTER_START     = 1 << 0
24    PARSE_EVENT_FILTER_END       = 1 << 1
25    PARSE_EVENT_FILTER_START_NS  = 1 << 2
26    PARSE_EVENT_FILTER_END_NS    = 1 << 3
27    PARSE_EVENT_FILTER_COMMENT   = 1 << 4
28    PARSE_EVENT_FILTER_PI        = 1 << 5
29
30
31cdef int _buildParseEventFilter(events) except -1:
32    cdef int event_filter
33    event_filter = 0
34    for event in events:
35        if event == 'start':
36            event_filter |= PARSE_EVENT_FILTER_START
37        elif event == 'end':
38            event_filter |= PARSE_EVENT_FILTER_END
39        elif event == 'start-ns':
40            event_filter |= PARSE_EVENT_FILTER_START_NS
41        elif event == 'end-ns':
42            event_filter |= PARSE_EVENT_FILTER_END_NS
43        elif event == 'comment':
44            event_filter |= PARSE_EVENT_FILTER_COMMENT
45        elif event == 'pi':
46            event_filter |= PARSE_EVENT_FILTER_PI
47        else:
48            raise ValueError, f"invalid event name '{event}'"
49    return event_filter
50
51
52cdef class _SaxParserTarget:
53    cdef int _sax_event_filter
54    def __cinit__(self):
55        self._sax_event_filter = 0
56
57    cdef _handleSaxStart(self, tag, attrib, nsmap):
58        return None
59    cdef _handleSaxEnd(self, tag):
60        return None
61    cdef int _handleSaxData(self, data) except -1:
62        return 0
63    cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
64        return 0
65    cdef _handleSaxPi(self, target, data):
66        return None
67    cdef _handleSaxComment(self, comment):
68        return None
69    cdef _handleSaxStartNs(self, prefix, uri):
70        return None
71    cdef _handleSaxEndNs(self, prefix):
72        return None
73
74
75#@cython.final
76@cython.internal
77@cython.no_gc_clear  # Required because parent class uses it - Cython bug.
78cdef class _SaxParserContext(_ParserContext):
79    u"""This class maps SAX2 events to parser target events.
80    """
81    cdef _SaxParserTarget _target
82    cdef _BaseParser _parser
83    cdef xmlparser.startElementNsSAX2Func _origSaxStart
84    cdef xmlparser.endElementNsSAX2Func   _origSaxEnd
85    cdef xmlparser.startElementSAXFunc    _origSaxStartNoNs
86    cdef xmlparser.endElementSAXFunc      _origSaxEndNoNs
87    cdef xmlparser.charactersSAXFunc      _origSaxData
88    cdef xmlparser.cdataBlockSAXFunc      _origSaxCData
89    cdef xmlparser.internalSubsetSAXFunc  _origSaxDoctype
90    cdef xmlparser.commentSAXFunc         _origSaxComment
91    cdef xmlparser.processingInstructionSAXFunc _origSaxPI
92    cdef xmlparser.startDocumentSAXFunc   _origSaxStartDocument
93
94    # for event collecting
95    cdef int _event_filter
96    cdef list _ns_stack
97    cdef list _node_stack
98    cdef _ParseEventsIterator events_iterator
99
100    # for iterparse
101    cdef _Element  _root
102    cdef _MultiTagMatcher _matcher
103
104    def __cinit__(self, _BaseParser parser):
105        self._ns_stack = []
106        self._node_stack = []
107        self._parser = parser
108        self.events_iterator = _ParseEventsIterator()
109
110    cdef void _setSaxParserTarget(self, _SaxParserTarget target):
111        self._target = target
112
113    cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt):
114        _ParserContext._initParserContext(self, c_ctxt)
115        if self._target is not None:
116            self._connectTarget(c_ctxt)
117        elif self._event_filter:
118            self._connectEvents(c_ctxt)
119
120    cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt):
121        """Wrap original SAX2 callbacks to call into parser target.
122        """
123        sax = c_ctxt.sax
124        self._origSaxStart = sax.startElementNs = NULL
125        self._origSaxStartNoNs = sax.startElement = NULL
126        if self._target._sax_event_filter & (SAX_EVENT_START |
127                                             SAX_EVENT_START_NS |
128                                             SAX_EVENT_END_NS):
129            # intercept => overwrite orig callback
130            # FIXME: also intercept on when collecting END events
131            if sax.initialized == xmlparser.XML_SAX2_MAGIC:
132                sax.startElementNs = _handleSaxTargetStart
133            if self._target._sax_event_filter & SAX_EVENT_START:
134                sax.startElement = _handleSaxTargetStartNoNs
135
136        self._origSaxEnd = sax.endElementNs = NULL
137        self._origSaxEndNoNs = sax.endElement = NULL
138        if self._target._sax_event_filter & (SAX_EVENT_END |
139                                             SAX_EVENT_END_NS):
140            if sax.initialized == xmlparser.XML_SAX2_MAGIC:
141                sax.endElementNs = _handleSaxEnd
142            if self._target._sax_event_filter & SAX_EVENT_END:
143                sax.endElement = _handleSaxEndNoNs
144
145        self._origSaxData = sax.characters = sax.cdataBlock = NULL
146        if self._target._sax_event_filter & SAX_EVENT_DATA:
147            sax.characters = sax.cdataBlock = _handleSaxData
148
149        # doctype propagation is always required for entity replacement
150        self._origSaxDoctype = sax.internalSubset
151        if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
152            sax.internalSubset = _handleSaxTargetDoctype
153
154        self._origSaxPI = sax.processingInstruction = NULL
155        if self._target._sax_event_filter & SAX_EVENT_PI:
156            sax.processingInstruction = _handleSaxTargetPI
157
158        self._origSaxComment = sax.comment = NULL
159        if self._target._sax_event_filter & SAX_EVENT_COMMENT:
160            sax.comment = _handleSaxTargetComment
161
162        # enforce entity replacement
163        sax.reference = NULL
164        c_ctxt.replaceEntities = 1
165
166    cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt):
167        """Wrap original SAX2 callbacks to collect parse events without parser target.
168        """
169        sax = c_ctxt.sax
170        self._origSaxStartDocument = sax.startDocument
171        sax.startDocument = _handleSaxStartDocument
172
173        # only override "start" event handler if needed
174        self._origSaxStart = sax.startElementNs
175        if self._event_filter == 0 or c_ctxt.html or \
176               self._event_filter & (PARSE_EVENT_FILTER_START |
177                                     PARSE_EVENT_FILTER_END |
178                                     PARSE_EVENT_FILTER_START_NS |
179                                     PARSE_EVENT_FILTER_END_NS):
180            sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart
181
182        self._origSaxStartNoNs = sax.startElement
183        if self._event_filter == 0 or c_ctxt.html or \
184               self._event_filter & (PARSE_EVENT_FILTER_START |
185                                     PARSE_EVENT_FILTER_END):
186            sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs
187
188        # only override "end" event handler if needed
189        self._origSaxEnd = sax.endElementNs
190        if self._event_filter == 0 or \
191               self._event_filter & (PARSE_EVENT_FILTER_END |
192                                     PARSE_EVENT_FILTER_END_NS):
193            sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd
194
195        self._origSaxEndNoNs = sax.endElement
196        if self._event_filter == 0 or \
197               self._event_filter & PARSE_EVENT_FILTER_END:
198            sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs
199
200        self._origSaxComment = sax.comment
201        if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
202            sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment
203
204        self._origSaxPI = sax.processingInstruction
205        if self._event_filter & PARSE_EVENT_FILTER_PI:
206            sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent
207
208    cdef _setEventFilter(self, events, tag):
209        self._event_filter = _buildParseEventFilter(events)
210        if not self._event_filter or tag is None or tag == '*':
211            self._matcher = None
212        else:
213            self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
214
215    cdef int startDocument(self, xmlDoc* c_doc) except -1:
216        try:
217            self._doc = _documentFactory(c_doc, self._parser)
218        finally:
219            self._parser = None  # clear circular reference ASAP
220        if self._matcher is not None:
221            self._matcher.cacheTags(self._doc, True) # force entry in libxml2 dict
222        return 0
223
224    cdef int pushEvent(self, event, xmlNode* c_node) except -1:
225        cdef _Element root
226        if self._root is None:
227            root = self._doc.getroot()
228            if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE:
229                self._root = root
230        node = _elementFactory(self._doc, c_node)
231        self.events_iterator._events.append( (event, node) )
232        return 0
233
234    cdef int flushEvents(self) except -1:
235        events = self.events_iterator._events
236        while self._node_stack:
237            events.append( ('end', self._node_stack.pop()) )
238            _pushSaxNsEndEvents(self)
239        while self._ns_stack:
240            _pushSaxNsEndEvents(self)
241
242    cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt):
243        if c_ctxt.errNo == xmlerror.XML_ERR_OK:
244            c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
245        # stop parsing immediately
246        c_ctxt.wellFormed = 0
247        c_ctxt.disableSAX = 1
248        c_ctxt.instate = xmlparser.XML_PARSER_EOF
249        self._store_raised()
250
251
252@cython.final
253@cython.internal
254cdef class _ParseEventsIterator:
255    """A reusable parse events iterator"""
256    cdef list _events
257    cdef int _event_index
258
259    def __cinit__(self):
260        self._events = []
261        self._event_index = 0
262
263    def __iter__(self):
264        return self
265
266    def __next__(self):
267        cdef int event_index = self._event_index
268        events = self._events
269        if event_index >= 2**10 or event_index * 2 >= len(events):
270            if event_index:
271                # clean up from time to time
272                del events[:event_index]
273                self._event_index = event_index = 0
274            if event_index >= len(events):
275                raise StopIteration
276        item = events[event_index]
277        self._event_index = event_index + 1
278        return item
279
280
281cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces,
282                                 const_xmlChar** c_namespaces):
283    "Build [(prefix, uri)] list of declared namespaces."
284    cdef int i
285    namespaces = []
286    for i in xrange(c_nb_namespaces):
287        namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1])))
288        c_namespaces += 2
289    return namespaces
290
291
292cdef void _handleSaxStart(
293        void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
294        const_xmlChar* c_namespace, int c_nb_namespaces,
295        const_xmlChar** c_namespaces,
296        int c_nb_attributes, int c_nb_defaulted,
297        const_xmlChar** c_attributes) with gil:
298    cdef int i
299    cdef size_t c_len
300    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
301    if c_ctxt._private is NULL or c_ctxt.disableSAX:
302        return
303    context = <_SaxParserContext>c_ctxt._private
304    cdef int event_filter = context._event_filter
305    try:
306        if (c_nb_namespaces and
307                event_filter & (PARSE_EVENT_FILTER_START_NS |
308                                PARSE_EVENT_FILTER_END_NS)):
309            declared_namespaces = _build_prefix_uri_list(
310                context, c_nb_namespaces, c_namespaces)
311            if event_filter & PARSE_EVENT_FILTER_START_NS:
312                for prefix_uri_tuple in declared_namespaces:
313                    context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
314        else:
315            declared_namespaces = None
316
317        context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
318                              c_nb_namespaces, c_namespaces, c_nb_attributes,
319                              c_nb_defaulted, c_attributes)
320        if c_ctxt.html:
321            _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
322
323        if event_filter & PARSE_EVENT_FILTER_END_NS:
324            context._ns_stack.append(declared_namespaces)
325        if event_filter & (PARSE_EVENT_FILTER_END |
326                           PARSE_EVENT_FILTER_START):
327            _pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None)
328    except:
329        context._handleSaxException(c_ctxt)
330    finally:
331        return  # swallow any further exceptions
332
333
334cdef void _handleSaxTargetStart(
335        void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
336        const_xmlChar* c_namespace, int c_nb_namespaces,
337        const_xmlChar** c_namespaces,
338        int c_nb_attributes, int c_nb_defaulted,
339        const_xmlChar** c_attributes) with gil:
340    cdef int i
341    cdef size_t c_len
342    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
343    if c_ctxt._private is NULL or c_ctxt.disableSAX:
344        return
345    context = <_SaxParserContext>c_ctxt._private
346
347    cdef int event_filter = context._event_filter
348    cdef int sax_event_filter = context._target._sax_event_filter
349    try:
350        if c_nb_namespaces:
351            declared_namespaces = _build_prefix_uri_list(
352                context, c_nb_namespaces, c_namespaces)
353
354            if event_filter & PARSE_EVENT_FILTER_START_NS:
355                for prefix_uri_tuple in declared_namespaces:
356                    context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
357
358            if sax_event_filter & SAX_EVENT_START_NS:
359                for prefix, uri in declared_namespaces:
360                    context._target._handleSaxStartNs(prefix, uri)
361                #if not context._target._sax_event_filter & SAX_EVENT_START:
362                #    # *Only* collecting start-ns events.
363                #    return
364        else:
365            declared_namespaces = None
366
367        if sax_event_filter & SAX_EVENT_START:
368            if c_nb_defaulted > 0:
369                # only add default attributes if we asked for them
370                if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
371                    c_nb_attributes -= c_nb_defaulted
372            if c_nb_attributes == 0:
373                attrib = IMMUTABLE_EMPTY_MAPPING
374            else:
375                attrib = {}
376                for i in xrange(c_nb_attributes):
377                    name = _namespacedNameFromNsName(
378                        c_attributes[2], c_attributes[0])
379                    if c_attributes[3] is NULL:
380                        value = ''
381                    else:
382                        c_len = c_attributes[4] - c_attributes[3]
383                        value = c_attributes[3][:c_len].decode('utf8')
384                    attrib[name] = value
385                    c_attributes += 5
386
387            nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING
388
389            element = _callTargetSaxStart(
390                context, c_ctxt,
391                _namespacedNameFromNsName(c_namespace, c_localname),
392                attrib, nsmap)
393        else:
394            element = None
395
396        if (event_filter & PARSE_EVENT_FILTER_END_NS or
397                sax_event_filter & SAX_EVENT_END_NS):
398            context._ns_stack.append(declared_namespaces)
399        if event_filter & (PARSE_EVENT_FILTER_END |
400                           PARSE_EVENT_FILTER_START):
401            _pushSaxStartEvent(context, c_ctxt, c_namespace,
402                               c_localname, element)
403    except:
404        context._handleSaxException(c_ctxt)
405    finally:
406        return  # swallow any further exceptions
407
408
409cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
410                              const_xmlChar** c_attributes) with gil:
411    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
412    if c_ctxt._private is NULL or c_ctxt.disableSAX:
413        return
414    context = <_SaxParserContext>c_ctxt._private
415    try:
416        context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
417        if c_ctxt.html:
418            _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
419        if context._event_filter & (PARSE_EVENT_FILTER_END |
420                                    PARSE_EVENT_FILTER_START):
421            _pushSaxStartEvent(context, c_ctxt, NULL, c_name, None)
422    except:
423        context._handleSaxException(c_ctxt)
424    finally:
425        return  # swallow any further exceptions
426
427
428cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
429                                    const_xmlChar** c_attributes) with gil:
430    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
431    if c_ctxt._private is NULL or c_ctxt.disableSAX:
432        return
433    context = <_SaxParserContext>c_ctxt._private
434    try:
435        if c_attributes is NULL:
436            attrib = IMMUTABLE_EMPTY_MAPPING
437        else:
438            attrib = {}
439            while c_attributes[0] is not NULL:
440                name = funicode(c_attributes[0])
441                attrib[name] = funicodeOrEmpty(c_attributes[1])
442                c_attributes += 2
443        element = _callTargetSaxStart(
444            context, c_ctxt, funicode(c_name),
445            attrib, IMMUTABLE_EMPTY_MAPPING)
446        if context._event_filter & (PARSE_EVENT_FILTER_END |
447                                    PARSE_EVENT_FILTER_START):
448            _pushSaxStartEvent(context, c_ctxt, NULL, c_name, element)
449    except:
450        context._handleSaxException(c_ctxt)
451    finally:
452        return  # swallow any further exceptions
453
454
455cdef _callTargetSaxStart(_SaxParserContext context,
456                         xmlparser.xmlParserCtxt* c_ctxt,
457                         tag, attrib, nsmap):
458    element = context._target._handleSaxStart(tag, attrib, nsmap)
459    if element is not None and c_ctxt.input is not NULL:
460        if isinstance(element, _Element):
461            (<_Element>element)._c_node.line = (
462                <unsigned short>c_ctxt.input.line
463                if c_ctxt.input.line < 65535 else 65535)
464    return element
465
466
467cdef int _pushSaxStartEvent(_SaxParserContext context,
468                            xmlparser.xmlParserCtxt* c_ctxt,
469                            const_xmlChar* c_href,
470                            const_xmlChar* c_name, node) except -1:
471    if (context._matcher is None or
472            context._matcher.matchesNsTag(c_href, c_name)):
473        if node is None and context._target is None:
474            assert context._doc is not None
475            node = _elementFactory(context._doc, c_ctxt.node)
476        if context._event_filter & PARSE_EVENT_FILTER_START:
477            context.events_iterator._events.append(('start', node))
478        if (context._target is None and
479                context._event_filter & PARSE_EVENT_FILTER_END):
480            context._node_stack.append(node)
481    return 0
482
483
484cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
485                        const_xmlChar* c_prefix,
486                        const_xmlChar* c_namespace) with gil:
487    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
488    if c_ctxt._private is NULL or c_ctxt.disableSAX:
489        return
490    context = <_SaxParserContext>c_ctxt._private
491    try:
492        if context._target is not None:
493            if context._target._sax_event_filter & SAX_EVENT_END:
494                node = context._target._handleSaxEnd(
495                    _namespacedNameFromNsName(c_namespace, c_localname))
496            else:
497                node = None
498        else:
499            context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
500            node = None
501        _pushSaxEndEvent(context, c_namespace, c_localname, node)
502        _pushSaxNsEndEvents(context)
503    except:
504        context._handleSaxException(c_ctxt)
505    finally:
506        return  # swallow any further exceptions
507
508
509cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) with gil:
510    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
511    if c_ctxt._private is NULL or c_ctxt.disableSAX:
512        return
513    context = <_SaxParserContext>c_ctxt._private
514    try:
515        if context._target is not None:
516            node = context._target._handleSaxEnd(funicode(c_name))
517        else:
518            context._origSaxEndNoNs(c_ctxt, c_name)
519            node = None
520        _pushSaxEndEvent(context, NULL, c_name, node)
521    except:
522        context._handleSaxException(c_ctxt)
523    finally:
524        return  # swallow any further exceptions
525
526
527cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1:
528    cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS
529    cdef bint call_target = (
530        context._target is not None
531        and context._target._sax_event_filter & SAX_EVENT_END_NS)
532    if not build_events and not call_target:
533        return 0
534
535    cdef list declared_namespaces = context._ns_stack.pop()
536    if declared_namespaces is None:
537        return 0
538
539    cdef tuple prefix_uri
540    for prefix_uri in reversed(declared_namespaces):
541        if call_target:
542            context._target._handleSaxEndNs(prefix_uri[0])
543        if build_events:
544            context.events_iterator._events.append(('end-ns', None))
545
546    return 0
547
548
549cdef int _pushSaxEndEvent(_SaxParserContext context,
550                          const_xmlChar* c_href,
551                          const_xmlChar* c_name, node) except -1:
552    if context._event_filter & PARSE_EVENT_FILTER_END:
553        if (context._matcher is None or
554                context._matcher.matchesNsTag(c_href, c_name)):
555            if context._target is None:
556                node = context._node_stack.pop()
557            context.events_iterator._events.append(('end', node))
558    return 0
559
560
561cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) with gil:
562    # can only be called if parsing with a target
563    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
564    if c_ctxt._private is NULL or c_ctxt.disableSAX:
565        return
566    context = <_SaxParserContext>c_ctxt._private
567    try:
568        context._target._handleSaxData(
569            c_data[:data_len].decode('utf8'))
570    except:
571        context._handleSaxException(c_ctxt)
572    finally:
573        return  # swallow any further exceptions
574
575
576cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
577                                  const_xmlChar* c_public,
578                                  const_xmlChar* c_system) with gil:
579    # can only be called if parsing with a target
580    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
581    if c_ctxt._private is NULL or c_ctxt.disableSAX:
582        return
583    context = <_SaxParserContext>c_ctxt._private
584    try:
585        context._target._handleSaxDoctype(
586            funicodeOrNone(c_name),
587            funicodeOrNone(c_public),
588            funicodeOrNone(c_system))
589    except:
590        context._handleSaxException(c_ctxt)
591    finally:
592        return  # swallow any further exceptions
593
594
595cdef void _handleSaxStartDocument(void* ctxt) with gil:
596    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
597    if c_ctxt._private is NULL or c_ctxt.disableSAX:
598        return
599    context = <_SaxParserContext>c_ctxt._private
600    context._origSaxStartDocument(ctxt)
601    c_doc = c_ctxt.myDoc
602    try:
603        context.startDocument(c_doc)
604    except:
605        context._handleSaxException(c_ctxt)
606    finally:
607        return  # swallow any further exceptions
608
609
610cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
611                             const_xmlChar* c_data) with gil:
612    # can only be called if parsing with a target
613    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
614    if c_ctxt._private is NULL or c_ctxt.disableSAX:
615        return
616    context = <_SaxParserContext>c_ctxt._private
617    try:
618        pi = context._target._handleSaxPi(
619            funicodeOrNone(c_target),
620            funicodeOrEmpty(c_data))
621        if context._event_filter & PARSE_EVENT_FILTER_PI:
622            context.events_iterator._events.append(('pi', pi))
623    except:
624        context._handleSaxException(c_ctxt)
625    finally:
626        return  # swallow any further exceptions
627
628
629cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
630                            const_xmlChar* data) with gil:
631    # can only be called when collecting pi events
632    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
633    if c_ctxt._private is NULL or c_ctxt.disableSAX:
634        return
635    context = <_SaxParserContext>c_ctxt._private
636    context._origSaxPI(ctxt, target, data)
637    c_node = _findLastEventNode(c_ctxt)
638    if c_node is NULL:
639        return
640    try:
641        context.pushEvent('pi', c_node)
642    except:
643        context._handleSaxException(c_ctxt)
644    finally:
645        return  # swallow any further exceptions
646
647
648cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) with gil:
649    # can only be called if parsing with a target
650    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
651    if c_ctxt._private is NULL or c_ctxt.disableSAX:
652        return
653    context = <_SaxParserContext>c_ctxt._private
654    try:
655        comment = context._target._handleSaxComment(funicodeOrEmpty(c_data))
656        if context._event_filter & PARSE_EVENT_FILTER_COMMENT:
657            context.events_iterator._events.append(('comment', comment))
658    except:
659        context._handleSaxException(c_ctxt)
660    finally:
661        return  # swallow any further exceptions
662
663
664cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) with gil:
665    # can only be called when collecting comment events
666    c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
667    if c_ctxt._private is NULL or c_ctxt.disableSAX:
668        return
669    context = <_SaxParserContext>c_ctxt._private
670    context._origSaxComment(ctxt, text)
671    c_node = _findLastEventNode(c_ctxt)
672    if c_node is NULL:
673        return
674    try:
675        context.pushEvent('comment', c_node)
676    except:
677        context._handleSaxException(c_ctxt)
678    finally:
679        return  # swallow any further exceptions
680
681
682cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt):
683    # this mimics what libxml2 creates for comments/PIs
684    if c_ctxt.inSubset == 1:
685        return c_ctxt.myDoc.intSubset.last
686    elif c_ctxt.inSubset == 2:
687        return c_ctxt.myDoc.extSubset.last
688    elif c_ctxt.node is NULL:
689        return c_ctxt.myDoc.last
690    elif c_ctxt.node.type == tree.XML_ELEMENT_NODE:
691        return c_ctxt.node.last
692    else:
693        return c_ctxt.node.next
694
695
696############################################################
697## ET compatible XML tree builder
698############################################################
699
700cdef class TreeBuilder(_SaxParserTarget):
701    u"""TreeBuilder(self, element_factory=None, parser=None,
702                    comment_factory=None, pi_factory=None,
703                    insert_comments=True, insert_pis=True)
704
705    Parser target that builds a tree from parse event callbacks.
706
707    The factory arguments can be used to influence the creation of
708    elements, comments and processing instructions.
709
710    By default, comments and processing instructions are inserted into
711    the tree, but they can be ignored by passing the respective flags.
712
713    The final tree is returned by the ``close()`` method.
714    """
715    cdef _BaseParser _parser
716    cdef object _factory
717    cdef object _comment_factory
718    cdef object _pi_factory
719    cdef list _data
720    cdef list _element_stack
721    cdef object _element_stack_pop
722    cdef _Element _last # may be None
723    cdef bint _in_tail
724    cdef bint _insert_comments
725    cdef bint _insert_pis
726
727    def __init__(self, *, element_factory=None, parser=None,
728                 comment_factory=None, pi_factory=None,
729                 bint insert_comments=True, bint insert_pis=True):
730        self._sax_event_filter = \
731            SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
732            SAX_EVENT_PI | SAX_EVENT_COMMENT
733        self._data = [] # data collector
734        self._element_stack = [] # element stack
735        self._element_stack_pop = self._element_stack.pop
736        self._last = None # last element
737        self._in_tail = 0 # true if we're after an end tag
738        self._factory = element_factory
739        self._comment_factory = comment_factory if comment_factory is not None else Comment
740        self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction
741        self._insert_comments = insert_comments
742        self._insert_pis = insert_pis
743        self._parser = parser
744
745    @cython.final
746    cdef int _flush(self) except -1:
747        if self._data:
748            if self._last is not None:
749                text = u"".join(self._data)
750                if self._in_tail:
751                    assert self._last.tail is None, u"internal error (tail)"
752                    self._last.tail = text
753                else:
754                    assert self._last.text is None, u"internal error (text)"
755                    self._last.text = text
756            del self._data[:]
757        return 0
758
759    # internal SAX event handlers
760
761    @cython.final
762    cdef _handleSaxStart(self, tag, attrib, nsmap):
763        self._flush()
764        if self._factory is not None:
765            self._last = self._factory(tag, attrib)
766            if self._element_stack:
767                _appendChild(self._element_stack[-1], self._last)
768        elif self._element_stack:
769            self._last = _makeSubElement(
770                self._element_stack[-1], tag, None, None, attrib, nsmap, None)
771        else:
772            self._last = _makeElement(
773                tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
774        self._element_stack.append(self._last)
775        self._in_tail = 0
776        return self._last
777
778    @cython.final
779    cdef _handleSaxEnd(self, tag):
780        self._flush()
781        self._last = self._element_stack_pop()
782        self._in_tail = 1
783        return self._last
784
785    @cython.final
786    cdef int _handleSaxData(self, data) except -1:
787        self._data.append(data)
788
789    @cython.final
790    cdef _handleSaxPi(self, target, data):
791        elem = self._pi_factory(target, data)
792        if self._insert_pis:
793            self._flush()
794            self._last = elem
795            if self._element_stack:
796                _appendChild(self._element_stack[-1], self._last)
797            self._in_tail = 1
798        return self._last
799
800    @cython.final
801    cdef _handleSaxComment(self, comment):
802        elem = self._comment_factory(comment)
803        if self._insert_comments:
804            self._flush()
805            self._last = elem
806            if self._element_stack:
807                _appendChild(self._element_stack[-1], self._last)
808            self._in_tail = 1
809        return elem
810
811    # Python level event handlers
812
813    def close(self):
814        u"""close(self)
815
816        Flushes the builder buffers, and returns the toplevel document
817        element.  Raises XMLSyntaxError on inconsistencies.
818        """
819        if self._element_stack:
820            raise XMLSyntaxAssertionError("missing end tags")
821        # TODO: this does not necessarily seem like an error case.  Why not just return None?
822        if self._last is None:
823            raise XMLSyntaxAssertionError("missing toplevel element")
824        return self._last
825
826    def data(self, data):
827        u"""data(self, data)
828
829        Adds text to the current element.  The value should be either an
830        8-bit string containing ASCII text, or a Unicode string.
831        """
832        self._handleSaxData(data)
833
834    def start(self, tag, attrs, nsmap=None):
835        u"""start(self, tag, attrs, nsmap=None)
836
837        Opens a new element.
838        """
839        if nsmap is None:
840            nsmap = IMMUTABLE_EMPTY_MAPPING
841        return self._handleSaxStart(tag, attrs, nsmap)
842
843    def end(self, tag):
844        u"""end(self, tag)
845
846        Closes the current element.
847        """
848        element = self._handleSaxEnd(tag)
849        assert self._last.tag == tag,\
850            f"end tag mismatch (expected {self._last.tag}, got {tag})"
851        return element
852
853    def pi(self, target, data=None):
854        u"""pi(self, target, data=None)
855
856        Creates a processing instruction using the factory, appends it
857        (unless disabled) and returns it.
858        """
859        return self._handleSaxPi(target, data)
860
861    def comment(self, comment):
862        u"""comment(self, comment)
863
864        Creates a comment using the factory, appends it (unless disabled)
865        and returns it.
866        """
867        return self._handleSaxComment(comment)
868