1 /*--------------------------------------------------------------------
2  * Licensed to PSF under a Contributor Agreement.
3  * See http://www.python.org/psf/license for licensing details.
4  *
5  * _elementtree - C accelerator for xml.etree.ElementTree
6  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7  * Copyright (c) 1999-2009 by Fredrik Lundh.
8  *
9  * info@pythonware.com
10  * http://www.pythonware.com
11  *--------------------------------------------------------------------
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 
16 #include "Python.h"
17 #include "structmember.h"
18 
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21 
22 /* An element can hold this many children without extra memory
23    allocations. */
24 #define STATIC_CHILDREN 4
25 
26 /* For best performance, chose a value so that 80-90% of all nodes
27    have no more than the given number of children.  Set this to zero
28    to minimize the size of the element structure itself (this only
29    helps if you have lots of leaf nodes with attributes). */
30 
31 /* Also note that pymalloc always allocates blocks in multiples of
32    eight bytes.  For the current C version of ElementTree, this means
33    that the number of children should be an even number, at least on
34    32-bit platforms. */
35 
36 /* -------------------------------------------------------------------- */
37 
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48 
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55 
56 /* macros used to store 'join' flags in string object pointers.  note
57    that all use of text and tail as object pointers must be wrapped in
58    JOIN_OBJ.  see comments in the ElementObject definition for more
59    info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63 
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68     PyObject *tmp = JOIN_OBJ(*p);
69     *p = new_joined_ptr;
70     Py_DECREF(tmp);
71 }
72 
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74  * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78     if (*p) {
79         _set_joined_ptr(p, NULL);
80     }
81 }
82 
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88 
89 
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92     PyObject *parseerror_obj;
93     PyObject *deepcopy_obj;
94     PyObject *elementpath_obj;
95     PyObject *comment_factory;
96     PyObject *pi_factory;
97 } elementtreestate;
98 
99 static struct PyModuleDef elementtreemodule;
100 
101 /* Given a module object (assumed to be _elementtree), get its per-module
102  * state.
103  */
104 #define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105 
106 /* Find the module instance imported in the currently running sub-interpreter
107  * and get its state.
108  */
109 #define ET_STATE_GLOBAL \
110     ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111 
112 static int
elementtree_clear(PyObject * m)113 elementtree_clear(PyObject *m)
114 {
115     elementtreestate *st = ET_STATE(m);
116     Py_CLEAR(st->parseerror_obj);
117     Py_CLEAR(st->deepcopy_obj);
118     Py_CLEAR(st->elementpath_obj);
119     Py_CLEAR(st->comment_factory);
120     Py_CLEAR(st->pi_factory);
121     return 0;
122 }
123 
124 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)125 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126 {
127     elementtreestate *st = ET_STATE(m);
128     Py_VISIT(st->parseerror_obj);
129     Py_VISIT(st->deepcopy_obj);
130     Py_VISIT(st->elementpath_obj);
131     Py_VISIT(st->comment_factory);
132     Py_VISIT(st->pi_factory);
133     return 0;
134 }
135 
136 static void
elementtree_free(void * m)137 elementtree_free(void *m)
138 {
139     elementtree_clear((PyObject *)m);
140 }
141 
142 /* helpers */
143 
144 LOCAL(PyObject*)
list_join(PyObject * list)145 list_join(PyObject* list)
146 {
147     /* join list elements */
148     PyObject* joiner;
149     PyObject* result;
150 
151     joiner = PyUnicode_FromStringAndSize("", 0);
152     if (!joiner)
153         return NULL;
154     result = PyUnicode_Join(joiner, list);
155     Py_DECREF(joiner);
156     return result;
157 }
158 
159 /* Is the given object an empty dictionary?
160 */
161 static int
is_empty_dict(PyObject * obj)162 is_empty_dict(PyObject *obj)
163 {
164     return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
165 }
166 
167 
168 /* -------------------------------------------------------------------- */
169 /* the Element type */
170 
171 typedef struct {
172 
173     /* attributes (a dictionary object), or None if no attributes */
174     PyObject* attrib;
175 
176     /* child elements */
177     Py_ssize_t length; /* actual number of items */
178     Py_ssize_t allocated; /* allocated items */
179 
180     /* this either points to _children or to a malloced buffer */
181     PyObject* *children;
182 
183     PyObject* _children[STATIC_CHILDREN];
184 
185 } ElementObjectExtra;
186 
187 typedef struct {
188     PyObject_HEAD
189 
190     /* element tag (a string). */
191     PyObject* tag;
192 
193     /* text before first child.  note that this is a tagged pointer;
194        use JOIN_OBJ to get the object pointer.  the join flag is used
195        to distinguish lists created by the tree builder from lists
196        assigned to the attribute by application code; the former
197        should be joined before being returned to the user, the latter
198        should be left intact. */
199     PyObject* text;
200 
201     /* text after this element, in parent.  note that this is a tagged
202        pointer; use JOIN_OBJ to get the object pointer. */
203     PyObject* tail;
204 
205     ElementObjectExtra* extra;
206 
207     PyObject *weakreflist; /* For tp_weaklistoffset */
208 
209 } ElementObject;
210 
211 
212 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
213 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214 
215 
216 /* -------------------------------------------------------------------- */
217 /* Element constructors and destructor */
218 
219 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)220 create_extra(ElementObject* self, PyObject* attrib)
221 {
222     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
223     if (!self->extra) {
224         PyErr_NoMemory();
225         return -1;
226     }
227 
228     if (!attrib)
229         attrib = Py_None;
230 
231     Py_INCREF(attrib);
232     self->extra->attrib = attrib;
233 
234     self->extra->length = 0;
235     self->extra->allocated = STATIC_CHILDREN;
236     self->extra->children = self->extra->_children;
237 
238     return 0;
239 }
240 
241 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)242 dealloc_extra(ElementObjectExtra *extra)
243 {
244     Py_ssize_t i;
245 
246     if (!extra)
247         return;
248 
249     Py_DECREF(extra->attrib);
250 
251     for (i = 0; i < extra->length; i++)
252         Py_DECREF(extra->children[i]);
253 
254     if (extra->children != extra->_children)
255         PyObject_Free(extra->children);
256 
257     PyObject_Free(extra);
258 }
259 
260 LOCAL(void)
clear_extra(ElementObject * self)261 clear_extra(ElementObject* self)
262 {
263     ElementObjectExtra *myextra;
264 
265     if (!self->extra)
266         return;
267 
268     /* Avoid DECREFs calling into this code again (cycles, etc.)
269     */
270     myextra = self->extra;
271     self->extra = NULL;
272 
273     dealloc_extra(myextra);
274 }
275 
276 /* Convenience internal function to create new Element objects with the given
277  * tag and attributes.
278 */
279 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)280 create_new_element(PyObject* tag, PyObject* attrib)
281 {
282     ElementObject* self;
283 
284     self = PyObject_GC_New(ElementObject, &Element_Type);
285     if (self == NULL)
286         return NULL;
287     self->extra = NULL;
288 
289     Py_INCREF(tag);
290     self->tag = tag;
291 
292     Py_INCREF(Py_None);
293     self->text = Py_None;
294 
295     Py_INCREF(Py_None);
296     self->tail = Py_None;
297 
298     self->weakreflist = NULL;
299 
300     ALLOC(sizeof(ElementObject), "create element");
301     PyObject_GC_Track(self);
302 
303     if (attrib != Py_None && !is_empty_dict(attrib)) {
304         if (create_extra(self, attrib) < 0) {
305             Py_DECREF(self);
306             return NULL;
307         }
308     }
309 
310     return (PyObject*) self;
311 }
312 
313 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)314 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315 {
316     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317     if (e != NULL) {
318         Py_INCREF(Py_None);
319         e->tag = Py_None;
320 
321         Py_INCREF(Py_None);
322         e->text = Py_None;
323 
324         Py_INCREF(Py_None);
325         e->tail = Py_None;
326 
327         e->extra = NULL;
328         e->weakreflist = NULL;
329     }
330     return (PyObject *)e;
331 }
332 
333 /* Helper function for extracting the attrib dictionary from a keywords dict.
334  * This is required by some constructors/functions in this module that can
335  * either accept attrib as a keyword argument or all attributes splashed
336  * directly into *kwds.
337  *
338  * Return a dictionary with the content of kwds merged into the content of
339  * attrib. If there is no attrib keyword, return a copy of kwds.
340  */
341 static PyObject*
get_attrib_from_keywords(PyObject * kwds)342 get_attrib_from_keywords(PyObject *kwds)
343 {
344     PyObject *attrib_str = PyUnicode_FromString("attrib");
345     if (attrib_str == NULL) {
346         return NULL;
347     }
348     PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
349 
350     if (attrib) {
351         /* If attrib was found in kwds, copy its value and remove it from
352          * kwds
353          */
354         if (!PyDict_Check(attrib)) {
355             Py_DECREF(attrib_str);
356             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357                          Py_TYPE(attrib)->tp_name);
358             return NULL;
359         }
360         attrib = PyDict_Copy(attrib);
361         if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362             Py_DECREF(attrib);
363             attrib = NULL;
364         }
365     }
366     else if (!PyErr_Occurred()) {
367         attrib = PyDict_New();
368     }
369 
370     Py_DECREF(attrib_str);
371 
372     if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373         Py_DECREF(attrib);
374         return NULL;
375     }
376     return attrib;
377 }
378 
379 /*[clinic input]
380 module _elementtree
381 class _elementtree.Element "ElementObject *" "&Element_Type"
382 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384 [clinic start generated code]*/
385 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386 
387 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)388 element_init(PyObject *self, PyObject *args, PyObject *kwds)
389 {
390     PyObject *tag;
391     PyObject *attrib = NULL;
392     ElementObject *self_elem;
393 
394     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395         return -1;
396 
397     if (attrib) {
398         /* attrib passed as positional arg */
399         attrib = PyDict_Copy(attrib);
400         if (!attrib)
401             return -1;
402         if (kwds) {
403             if (PyDict_Update(attrib, kwds) < 0) {
404                 Py_DECREF(attrib);
405                 return -1;
406             }
407         }
408     } else if (kwds) {
409         /* have keywords args */
410         attrib = get_attrib_from_keywords(kwds);
411         if (!attrib)
412             return -1;
413     }
414 
415     self_elem = (ElementObject *)self;
416 
417     if (attrib != NULL && !is_empty_dict(attrib)) {
418         if (create_extra(self_elem, attrib) < 0) {
419             Py_DECREF(attrib);
420             return -1;
421         }
422     }
423 
424     /* We own a reference to attrib here and it's no longer needed. */
425     Py_XDECREF(attrib);
426 
427     /* Replace the objects already pointed to by tag, text and tail. */
428     Py_INCREF(tag);
429     Py_XSETREF(self_elem->tag, tag);
430 
431     Py_INCREF(Py_None);
432     _set_joined_ptr(&self_elem->text, Py_None);
433 
434     Py_INCREF(Py_None);
435     _set_joined_ptr(&self_elem->tail, Py_None);
436 
437     return 0;
438 }
439 
440 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)441 element_resize(ElementObject* self, Py_ssize_t extra)
442 {
443     Py_ssize_t size;
444     PyObject* *children;
445 
446     assert(extra >= 0);
447     /* make sure self->children can hold the given number of extra
448        elements.  set an exception and return -1 if allocation failed */
449 
450     if (!self->extra) {
451         if (create_extra(self, NULL) < 0)
452             return -1;
453     }
454 
455     size = self->extra->length + extra;  /* never overflows */
456 
457     if (size > self->extra->allocated) {
458         /* use Python 2.4's list growth strategy */
459         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
460         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
461          * which needs at least 4 bytes.
462          * Although it's a false alarm always assume at least one child to
463          * be safe.
464          */
465         size = size ? size : 1;
466         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467             goto nomemory;
468         if (self->extra->children != self->extra->_children) {
469             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
470              * "children", which needs at least 4 bytes. Although it's a
471              * false alarm always assume at least one child to be safe.
472              */
473             children = PyObject_Realloc(self->extra->children,
474                                         size * sizeof(PyObject*));
475             if (!children)
476                 goto nomemory;
477         } else {
478             children = PyObject_Malloc(size * sizeof(PyObject*));
479             if (!children)
480                 goto nomemory;
481             /* copy existing children from static area to malloc buffer */
482             memcpy(children, self->extra->children,
483                    self->extra->length * sizeof(PyObject*));
484         }
485         self->extra->children = children;
486         self->extra->allocated = size;
487     }
488 
489     return 0;
490 
491   nomemory:
492     PyErr_NoMemory();
493     return -1;
494 }
495 
496 LOCAL(void)
raise_type_error(PyObject * element)497 raise_type_error(PyObject *element)
498 {
499     PyErr_Format(PyExc_TypeError,
500                  "expected an Element, not \"%.200s\"",
501                  Py_TYPE(element)->tp_name);
502 }
503 
504 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)505 element_add_subelement(ElementObject* self, PyObject* element)
506 {
507     /* add a child element to a parent */
508 
509     if (!Element_Check(element)) {
510         raise_type_error(element);
511         return -1;
512     }
513 
514     if (element_resize(self, 1) < 0)
515         return -1;
516 
517     Py_INCREF(element);
518     self->extra->children[self->extra->length] = element;
519 
520     self->extra->length++;
521 
522     return 0;
523 }
524 
525 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)526 element_get_attrib(ElementObject* self)
527 {
528     /* return borrowed reference to attrib dictionary */
529     /* note: this function assumes that the extra section exists */
530 
531     PyObject* res = self->extra->attrib;
532 
533     if (res == Py_None) {
534         /* create missing dictionary */
535         res = PyDict_New();
536         if (!res)
537             return NULL;
538         Py_DECREF(Py_None);
539         self->extra->attrib = res;
540     }
541 
542     return res;
543 }
544 
545 LOCAL(PyObject*)
element_get_text(ElementObject * self)546 element_get_text(ElementObject* self)
547 {
548     /* return borrowed reference to text attribute */
549 
550     PyObject *res = self->text;
551 
552     if (JOIN_GET(res)) {
553         res = JOIN_OBJ(res);
554         if (PyList_CheckExact(res)) {
555             PyObject *tmp = list_join(res);
556             if (!tmp)
557                 return NULL;
558             self->text = tmp;
559             Py_DECREF(res);
560             res = tmp;
561         }
562     }
563 
564     return res;
565 }
566 
567 LOCAL(PyObject*)
element_get_tail(ElementObject * self)568 element_get_tail(ElementObject* self)
569 {
570     /* return borrowed reference to text attribute */
571 
572     PyObject *res = self->tail;
573 
574     if (JOIN_GET(res)) {
575         res = JOIN_OBJ(res);
576         if (PyList_CheckExact(res)) {
577             PyObject *tmp = list_join(res);
578             if (!tmp)
579                 return NULL;
580             self->tail = tmp;
581             Py_DECREF(res);
582             res = tmp;
583         }
584     }
585 
586     return res;
587 }
588 
589 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)590 subelement(PyObject *self, PyObject *args, PyObject *kwds)
591 {
592     PyObject* elem;
593 
594     ElementObject* parent;
595     PyObject* tag;
596     PyObject* attrib = NULL;
597     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598                           &Element_Type, &parent, &tag,
599                           &PyDict_Type, &attrib)) {
600         return NULL;
601     }
602 
603     if (attrib) {
604         /* attrib passed as positional arg */
605         attrib = PyDict_Copy(attrib);
606         if (!attrib)
607             return NULL;
608         if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609             Py_DECREF(attrib);
610             return NULL;
611         }
612     } else if (kwds) {
613         /* have keyword args */
614         attrib = get_attrib_from_keywords(kwds);
615         if (!attrib)
616             return NULL;
617     } else {
618         /* no attrib arg, no kwds, so no attribute */
619         Py_INCREF(Py_None);
620         attrib = Py_None;
621     }
622 
623     elem = create_new_element(tag, attrib);
624     Py_DECREF(attrib);
625     if (elem == NULL)
626         return NULL;
627 
628     if (element_add_subelement(parent, elem) < 0) {
629         Py_DECREF(elem);
630         return NULL;
631     }
632 
633     return elem;
634 }
635 
636 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)637 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638 {
639     Py_VISIT(self->tag);
640     Py_VISIT(JOIN_OBJ(self->text));
641     Py_VISIT(JOIN_OBJ(self->tail));
642 
643     if (self->extra) {
644         Py_ssize_t i;
645         Py_VISIT(self->extra->attrib);
646 
647         for (i = 0; i < self->extra->length; ++i)
648             Py_VISIT(self->extra->children[i]);
649     }
650     return 0;
651 }
652 
653 static int
element_gc_clear(ElementObject * self)654 element_gc_clear(ElementObject *self)
655 {
656     Py_CLEAR(self->tag);
657     _clear_joined_ptr(&self->text);
658     _clear_joined_ptr(&self->tail);
659 
660     /* After dropping all references from extra, it's no longer valid anyway,
661      * so fully deallocate it.
662     */
663     clear_extra(self);
664     return 0;
665 }
666 
667 static void
element_dealloc(ElementObject * self)668 element_dealloc(ElementObject* self)
669 {
670     /* bpo-31095: UnTrack is needed before calling any callbacks */
671     PyObject_GC_UnTrack(self);
672     Py_TRASHCAN_BEGIN(self, element_dealloc)
673 
674     if (self->weakreflist != NULL)
675         PyObject_ClearWeakRefs((PyObject *) self);
676 
677     /* element_gc_clear clears all references and deallocates extra
678     */
679     element_gc_clear(self);
680 
681     RELEASE(sizeof(ElementObject), "destroy element");
682     Py_TYPE(self)->tp_free((PyObject *)self);
683     Py_TRASHCAN_END
684 }
685 
686 /* -------------------------------------------------------------------- */
687 
688 /*[clinic input]
689 _elementtree.Element.append
690 
691     subelement: object(subclass_of='&Element_Type')
692     /
693 
694 [clinic start generated code]*/
695 
696 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)697 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699 {
700     if (element_add_subelement(self, subelement) < 0)
701         return NULL;
702 
703     Py_RETURN_NONE;
704 }
705 
706 /*[clinic input]
707 _elementtree.Element.clear
708 
709 [clinic start generated code]*/
710 
711 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)712 _elementtree_Element_clear_impl(ElementObject *self)
713 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714 {
715     clear_extra(self);
716 
717     Py_INCREF(Py_None);
718     _set_joined_ptr(&self->text, Py_None);
719 
720     Py_INCREF(Py_None);
721     _set_joined_ptr(&self->tail, Py_None);
722 
723     Py_RETURN_NONE;
724 }
725 
726 /*[clinic input]
727 _elementtree.Element.__copy__
728 
729 [clinic start generated code]*/
730 
731 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)732 _elementtree_Element___copy___impl(ElementObject *self)
733 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
734 {
735     Py_ssize_t i;
736     ElementObject* element;
737 
738     element = (ElementObject*) create_new_element(
739         self->tag, (self->extra) ? self->extra->attrib : Py_None);
740     if (!element)
741         return NULL;
742 
743     Py_INCREF(JOIN_OBJ(self->text));
744     _set_joined_ptr(&element->text, self->text);
745 
746     Py_INCREF(JOIN_OBJ(self->tail));
747     _set_joined_ptr(&element->tail, self->tail);
748 
749     assert(!element->extra || !element->extra->length);
750     if (self->extra) {
751         if (element_resize(element, self->extra->length) < 0) {
752             Py_DECREF(element);
753             return NULL;
754         }
755 
756         for (i = 0; i < self->extra->length; i++) {
757             Py_INCREF(self->extra->children[i]);
758             element->extra->children[i] = self->extra->children[i];
759         }
760 
761         assert(!element->extra->length);
762         element->extra->length = self->extra->length;
763     }
764 
765     return (PyObject*) element;
766 }
767 
768 /* Helper for a deep copy. */
769 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770 
771 /*[clinic input]
772 _elementtree.Element.__deepcopy__
773 
774     memo: object(subclass_of="&PyDict_Type")
775     /
776 
777 [clinic start generated code]*/
778 
779 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)780 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
782 {
783     Py_ssize_t i;
784     ElementObject* element;
785     PyObject* tag;
786     PyObject* attrib;
787     PyObject* text;
788     PyObject* tail;
789     PyObject* id;
790 
791     tag = deepcopy(self->tag, memo);
792     if (!tag)
793         return NULL;
794 
795     if (self->extra) {
796         attrib = deepcopy(self->extra->attrib, memo);
797         if (!attrib) {
798             Py_DECREF(tag);
799             return NULL;
800         }
801     } else {
802         Py_INCREF(Py_None);
803         attrib = Py_None;
804     }
805 
806     element = (ElementObject*) create_new_element(tag, attrib);
807 
808     Py_DECREF(tag);
809     Py_DECREF(attrib);
810 
811     if (!element)
812         return NULL;
813 
814     text = deepcopy(JOIN_OBJ(self->text), memo);
815     if (!text)
816         goto error;
817     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
818 
819     tail = deepcopy(JOIN_OBJ(self->tail), memo);
820     if (!tail)
821         goto error;
822     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
823 
824     assert(!element->extra || !element->extra->length);
825     if (self->extra) {
826         if (element_resize(element, self->extra->length) < 0)
827             goto error;
828 
829         for (i = 0; i < self->extra->length; i++) {
830             PyObject* child = deepcopy(self->extra->children[i], memo);
831             if (!child || !Element_Check(child)) {
832                 if (child) {
833                     raise_type_error(child);
834                     Py_DECREF(child);
835                 }
836                 element->extra->length = i;
837                 goto error;
838             }
839             element->extra->children[i] = child;
840         }
841 
842         assert(!element->extra->length);
843         element->extra->length = self->extra->length;
844     }
845 
846     /* add object to memo dictionary (so deepcopy won't visit it again) */
847     id = PyLong_FromSsize_t((uintptr_t) self);
848     if (!id)
849         goto error;
850 
851     i = PyDict_SetItem(memo, id, (PyObject*) element);
852 
853     Py_DECREF(id);
854 
855     if (i < 0)
856         goto error;
857 
858     return (PyObject*) element;
859 
860   error:
861     Py_DECREF(element);
862     return NULL;
863 }
864 
865 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)866 deepcopy(PyObject *object, PyObject *memo)
867 {
868     /* do a deep copy of the given object */
869     elementtreestate *st;
870     PyObject *stack[2];
871 
872     /* Fast paths */
873     if (object == Py_None || PyUnicode_CheckExact(object)) {
874         Py_INCREF(object);
875         return object;
876     }
877 
878     if (Py_REFCNT(object) == 1) {
879         if (PyDict_CheckExact(object)) {
880             PyObject *key, *value;
881             Py_ssize_t pos = 0;
882             int simple = 1;
883             while (PyDict_Next(object, &pos, &key, &value)) {
884                 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885                     simple = 0;
886                     break;
887                 }
888             }
889             if (simple)
890                 return PyDict_Copy(object);
891             /* Fall through to general case */
892         }
893         else if (Element_CheckExact(object)) {
894             return _elementtree_Element___deepcopy___impl(
895                 (ElementObject *)object, memo);
896         }
897     }
898 
899     /* General case */
900     st = ET_STATE_GLOBAL;
901     if (!st->deepcopy_obj) {
902         PyErr_SetString(PyExc_RuntimeError,
903                         "deepcopy helper not found");
904         return NULL;
905     }
906 
907     stack[0] = object;
908     stack[1] = memo;
909     return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
910 }
911 
912 
913 /*[clinic input]
914 _elementtree.Element.__sizeof__ -> Py_ssize_t
915 
916 [clinic start generated code]*/
917 
918 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)919 _elementtree_Element___sizeof___impl(ElementObject *self)
920 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
921 {
922     Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
923     if (self->extra) {
924         result += sizeof(ElementObjectExtra);
925         if (self->extra->children != self->extra->_children)
926             result += sizeof(PyObject*) * self->extra->allocated;
927     }
928     return result;
929 }
930 
931 /* dict keys for getstate/setstate. */
932 #define PICKLED_TAG "tag"
933 #define PICKLED_CHILDREN "_children"
934 #define PICKLED_ATTRIB "attrib"
935 #define PICKLED_TAIL "tail"
936 #define PICKLED_TEXT "text"
937 
938 /* __getstate__ returns a fabricated instance dict as in the pure-Python
939  * Element implementation, for interoperability/interchangeability.  This
940  * makes the pure-Python implementation details an API, but (a) there aren't
941  * any unnecessary structures there; and (b) it buys compatibility with 3.2
942  * pickles.  See issue #16076.
943  */
944 /*[clinic input]
945 _elementtree.Element.__getstate__
946 
947 [clinic start generated code]*/
948 
949 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)950 _elementtree_Element___getstate___impl(ElementObject *self)
951 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
952 {
953     Py_ssize_t i;
954     PyObject *children, *attrib;
955 
956     /* Build a list of children. */
957     children = PyList_New(self->extra ? self->extra->length : 0);
958     if (!children)
959         return NULL;
960     for (i = 0; i < PyList_GET_SIZE(children); i++) {
961         PyObject *child = self->extra->children[i];
962         Py_INCREF(child);
963         PyList_SET_ITEM(children, i, child);
964     }
965 
966     if (self->extra && self->extra->attrib != Py_None) {
967         attrib = self->extra->attrib;
968         Py_INCREF(attrib);
969     }
970     else {
971         attrib = PyDict_New();
972         if (!attrib) {
973             Py_DECREF(children);
974             return NULL;
975         }
976     }
977 
978     return Py_BuildValue("{sOsNsNsOsO}",
979                          PICKLED_TAG, self->tag,
980                          PICKLED_CHILDREN, children,
981                          PICKLED_ATTRIB, attrib,
982                          PICKLED_TEXT, JOIN_OBJ(self->text),
983                          PICKLED_TAIL, JOIN_OBJ(self->tail));
984 }
985 
986 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)987 element_setstate_from_attributes(ElementObject *self,
988                                  PyObject *tag,
989                                  PyObject *attrib,
990                                  PyObject *text,
991                                  PyObject *tail,
992                                  PyObject *children)
993 {
994     Py_ssize_t i, nchildren;
995     ElementObjectExtra *oldextra = NULL;
996 
997     if (!tag) {
998         PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
999         return NULL;
1000     }
1001 
1002     Py_INCREF(tag);
1003     Py_XSETREF(self->tag, tag);
1004 
1005     text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1006     Py_INCREF(JOIN_OBJ(text));
1007     _set_joined_ptr(&self->text, text);
1008 
1009     tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1010     Py_INCREF(JOIN_OBJ(tail));
1011     _set_joined_ptr(&self->tail, tail);
1012 
1013     /* Handle ATTRIB and CHILDREN. */
1014     if (!children && !attrib) {
1015         Py_RETURN_NONE;
1016     }
1017 
1018     /* Compute 'nchildren'. */
1019     if (children) {
1020         if (!PyList_Check(children)) {
1021             PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1022             return NULL;
1023         }
1024         nchildren = PyList_GET_SIZE(children);
1025 
1026         /* (Re-)allocate 'extra'.
1027            Avoid DECREFs calling into this code again (cycles, etc.)
1028          */
1029         oldextra = self->extra;
1030         self->extra = NULL;
1031         if (element_resize(self, nchildren)) {
1032             assert(!self->extra || !self->extra->length);
1033             clear_extra(self);
1034             self->extra = oldextra;
1035             return NULL;
1036         }
1037         assert(self->extra);
1038         assert(self->extra->allocated >= nchildren);
1039         if (oldextra) {
1040             assert(self->extra->attrib == Py_None);
1041             self->extra->attrib = oldextra->attrib;
1042             oldextra->attrib = Py_None;
1043         }
1044 
1045         /* Copy children */
1046         for (i = 0; i < nchildren; i++) {
1047             PyObject *child = PyList_GET_ITEM(children, i);
1048             if (!Element_Check(child)) {
1049                 raise_type_error(child);
1050                 self->extra->length = i;
1051                 dealloc_extra(oldextra);
1052                 return NULL;
1053             }
1054             Py_INCREF(child);
1055             self->extra->children[i] = child;
1056         }
1057 
1058         assert(!self->extra->length);
1059         self->extra->length = nchildren;
1060     }
1061     else {
1062         if (element_resize(self, 0)) {
1063             return NULL;
1064         }
1065     }
1066 
1067     /* Stash attrib. */
1068     if (attrib) {
1069         Py_INCREF(attrib);
1070         Py_XSETREF(self->extra->attrib, attrib);
1071     }
1072     dealloc_extra(oldextra);
1073 
1074     Py_RETURN_NONE;
1075 }
1076 
1077 /* __setstate__ for Element instance from the Python implementation.
1078  * 'state' should be the instance dict.
1079  */
1080 
1081 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1082 element_setstate_from_Python(ElementObject *self, PyObject *state)
1083 {
1084     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1085                              PICKLED_TAIL, PICKLED_CHILDREN, 0};
1086     PyObject *args;
1087     PyObject *tag, *attrib, *text, *tail, *children;
1088     PyObject *retval;
1089 
1090     tag = attrib = text = tail = children = NULL;
1091     args = PyTuple_New(0);
1092     if (!args)
1093         return NULL;
1094 
1095     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1096                                     &attrib, &text, &tail, &children))
1097         retval = element_setstate_from_attributes(self, tag, attrib, text,
1098                                                   tail, children);
1099     else
1100         retval = NULL;
1101 
1102     Py_DECREF(args);
1103     return retval;
1104 }
1105 
1106 /*[clinic input]
1107 _elementtree.Element.__setstate__
1108 
1109     state: object
1110     /
1111 
1112 [clinic start generated code]*/
1113 
1114 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1115 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1116 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1117 {
1118     if (!PyDict_CheckExact(state)) {
1119         PyErr_Format(PyExc_TypeError,
1120                      "Don't know how to unpickle \"%.200R\" as an Element",
1121                      state);
1122         return NULL;
1123     }
1124     else
1125         return element_setstate_from_Python(self, state);
1126 }
1127 
1128 LOCAL(int)
checkpath(PyObject * tag)1129 checkpath(PyObject* tag)
1130 {
1131     Py_ssize_t i;
1132     int check = 1;
1133 
1134     /* check if a tag contains an xpath character */
1135 
1136 #define PATHCHAR(ch) \
1137     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1138 
1139     if (PyUnicode_Check(tag)) {
1140         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1141         void *data = PyUnicode_DATA(tag);
1142         unsigned int kind = PyUnicode_KIND(tag);
1143         if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1144                 PyUnicode_READ(kind, data, 1) == '}' || (
1145                 PyUnicode_READ(kind, data, 1) == '*' &&
1146                 PyUnicode_READ(kind, data, 2) == '}'))) {
1147             /* wildcard: '{}tag' or '{*}tag' */
1148             return 1;
1149         }
1150         for (i = 0; i < len; i++) {
1151             Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1152             if (ch == '{')
1153                 check = 0;
1154             else if (ch == '}')
1155                 check = 1;
1156             else if (check && PATHCHAR(ch))
1157                 return 1;
1158         }
1159         return 0;
1160     }
1161     if (PyBytes_Check(tag)) {
1162         char *p = PyBytes_AS_STRING(tag);
1163         const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1164         if (len >= 3 && p[0] == '{' && (
1165                 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1166             /* wildcard: '{}tag' or '{*}tag' */
1167             return 1;
1168         }
1169         for (i = 0; i < len; i++) {
1170             if (p[i] == '{')
1171                 check = 0;
1172             else if (p[i] == '}')
1173                 check = 1;
1174             else if (check && PATHCHAR(p[i]))
1175                 return 1;
1176         }
1177         return 0;
1178     }
1179 
1180     return 1; /* unknown type; might be path expression */
1181 }
1182 
1183 /*[clinic input]
1184 _elementtree.Element.extend
1185 
1186     elements: object
1187     /
1188 
1189 [clinic start generated code]*/
1190 
1191 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1192 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1193 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1194 {
1195     PyObject* seq;
1196     Py_ssize_t i;
1197 
1198     seq = PySequence_Fast(elements, "");
1199     if (!seq) {
1200         PyErr_Format(
1201             PyExc_TypeError,
1202             "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1203             );
1204         return NULL;
1205     }
1206 
1207     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1208         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1209         Py_INCREF(element);
1210         if (element_add_subelement(self, element) < 0) {
1211             Py_DECREF(seq);
1212             Py_DECREF(element);
1213             return NULL;
1214         }
1215         Py_DECREF(element);
1216     }
1217 
1218     Py_DECREF(seq);
1219 
1220     Py_RETURN_NONE;
1221 }
1222 
1223 /*[clinic input]
1224 _elementtree.Element.find
1225 
1226     path: object
1227     namespaces: object = None
1228 
1229 [clinic start generated code]*/
1230 
1231 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1232 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1233                                PyObject *namespaces)
1234 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1235 {
1236     Py_ssize_t i;
1237     elementtreestate *st = ET_STATE_GLOBAL;
1238 
1239     if (checkpath(path) || namespaces != Py_None) {
1240         _Py_IDENTIFIER(find);
1241         return _PyObject_CallMethodIdObjArgs(
1242             st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1243             );
1244     }
1245 
1246     if (!self->extra)
1247         Py_RETURN_NONE;
1248 
1249     for (i = 0; i < self->extra->length; i++) {
1250         PyObject* item = self->extra->children[i];
1251         int rc;
1252         assert(Element_Check(item));
1253         Py_INCREF(item);
1254         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1255         if (rc > 0)
1256             return item;
1257         Py_DECREF(item);
1258         if (rc < 0)
1259             return NULL;
1260     }
1261 
1262     Py_RETURN_NONE;
1263 }
1264 
1265 /*[clinic input]
1266 _elementtree.Element.findtext
1267 
1268     path: object
1269     default: object = None
1270     namespaces: object = None
1271 
1272 [clinic start generated code]*/
1273 
1274 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1275 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1276                                    PyObject *default_value,
1277                                    PyObject *namespaces)
1278 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1279 {
1280     Py_ssize_t i;
1281     _Py_IDENTIFIER(findtext);
1282     elementtreestate *st = ET_STATE_GLOBAL;
1283 
1284     if (checkpath(path) || namespaces != Py_None)
1285         return _PyObject_CallMethodIdObjArgs(
1286             st->elementpath_obj, &PyId_findtext,
1287             self, path, default_value, namespaces, NULL
1288             );
1289 
1290     if (!self->extra) {
1291         Py_INCREF(default_value);
1292         return default_value;
1293     }
1294 
1295     for (i = 0; i < self->extra->length; i++) {
1296         PyObject *item = self->extra->children[i];
1297         int rc;
1298         assert(Element_Check(item));
1299         Py_INCREF(item);
1300         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1301         if (rc > 0) {
1302             PyObject* text = element_get_text((ElementObject*)item);
1303             if (text == Py_None) {
1304                 Py_DECREF(item);
1305                 return PyUnicode_New(0, 0);
1306             }
1307             Py_XINCREF(text);
1308             Py_DECREF(item);
1309             return text;
1310         }
1311         Py_DECREF(item);
1312         if (rc < 0)
1313             return NULL;
1314     }
1315 
1316     Py_INCREF(default_value);
1317     return default_value;
1318 }
1319 
1320 /*[clinic input]
1321 _elementtree.Element.findall
1322 
1323     path: object
1324     namespaces: object = None
1325 
1326 [clinic start generated code]*/
1327 
1328 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1329 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1330                                   PyObject *namespaces)
1331 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1332 {
1333     Py_ssize_t i;
1334     PyObject* out;
1335     elementtreestate *st = ET_STATE_GLOBAL;
1336 
1337     if (checkpath(path) || namespaces != Py_None) {
1338         _Py_IDENTIFIER(findall);
1339         return _PyObject_CallMethodIdObjArgs(
1340             st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1341             );
1342     }
1343 
1344     out = PyList_New(0);
1345     if (!out)
1346         return NULL;
1347 
1348     if (!self->extra)
1349         return out;
1350 
1351     for (i = 0; i < self->extra->length; i++) {
1352         PyObject* item = self->extra->children[i];
1353         int rc;
1354         assert(Element_Check(item));
1355         Py_INCREF(item);
1356         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1357         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1358             Py_DECREF(item);
1359             Py_DECREF(out);
1360             return NULL;
1361         }
1362         Py_DECREF(item);
1363     }
1364 
1365     return out;
1366 }
1367 
1368 /*[clinic input]
1369 _elementtree.Element.iterfind
1370 
1371     path: object
1372     namespaces: object = None
1373 
1374 [clinic start generated code]*/
1375 
1376 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1377 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1378                                    PyObject *namespaces)
1379 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1380 {
1381     PyObject* tag = path;
1382     _Py_IDENTIFIER(iterfind);
1383     elementtreestate *st = ET_STATE_GLOBAL;
1384 
1385     return _PyObject_CallMethodIdObjArgs(
1386         st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1387 }
1388 
1389 /*[clinic input]
1390 _elementtree.Element.get
1391 
1392     key: object
1393     default: object = None
1394 
1395 [clinic start generated code]*/
1396 
1397 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1398 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1399                               PyObject *default_value)
1400 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1401 {
1402     PyObject* value;
1403 
1404     if (!self->extra || self->extra->attrib == Py_None)
1405         value = default_value;
1406     else {
1407         value = PyDict_GetItemWithError(self->extra->attrib, key);
1408         if (!value) {
1409             if (PyErr_Occurred()) {
1410                 return NULL;
1411             }
1412             value = default_value;
1413         }
1414     }
1415 
1416     Py_INCREF(value);
1417     return value;
1418 }
1419 
1420 /*[clinic input]
1421 _elementtree.Element.getchildren
1422 
1423 [clinic start generated code]*/
1424 
1425 static PyObject *
_elementtree_Element_getchildren_impl(ElementObject * self)1426 _elementtree_Element_getchildren_impl(ElementObject *self)
1427 /*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
1428 {
1429     Py_ssize_t i;
1430     PyObject* list;
1431 
1432     if (PyErr_WarnEx(PyExc_DeprecationWarning,
1433                      "This method will be removed in future versions.  "
1434                      "Use 'list(elem)' or iteration over elem instead.",
1435                      1) < 0) {
1436         return NULL;
1437     }
1438 
1439     if (!self->extra)
1440         return PyList_New(0);
1441 
1442     list = PyList_New(self->extra->length);
1443     if (!list)
1444         return NULL;
1445 
1446     for (i = 0; i < self->extra->length; i++) {
1447         PyObject* item = self->extra->children[i];
1448         Py_INCREF(item);
1449         PyList_SET_ITEM(list, i, item);
1450     }
1451 
1452     return list;
1453 }
1454 
1455 
1456 static PyObject *
1457 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1458 
1459 
1460 /*[clinic input]
1461 _elementtree.Element.iter
1462 
1463     tag: object = None
1464 
1465 [clinic start generated code]*/
1466 
1467 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1468 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1469 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1470 {
1471     if (PyUnicode_Check(tag)) {
1472         if (PyUnicode_READY(tag) < 0)
1473             return NULL;
1474         if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1475             tag = Py_None;
1476     }
1477     else if (PyBytes_Check(tag)) {
1478         if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1479             tag = Py_None;
1480     }
1481 
1482     return create_elementiter(self, tag, 0);
1483 }
1484 
1485 
1486 /*[clinic input]
1487 _elementtree.Element.getiterator
1488 
1489     tag: object = None
1490 
1491 [clinic start generated code]*/
1492 
1493 static PyObject *
_elementtree_Element_getiterator_impl(ElementObject * self,PyObject * tag)1494 _elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1495 /*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1496 {
1497     if (PyErr_WarnEx(PyExc_DeprecationWarning,
1498                      "This method will be removed in future versions.  "
1499                      "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1500                      1) < 0) {
1501         return NULL;
1502     }
1503     return _elementtree_Element_iter_impl(self, tag);
1504 }
1505 
1506 
1507 /*[clinic input]
1508 _elementtree.Element.itertext
1509 
1510 [clinic start generated code]*/
1511 
1512 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1513 _elementtree_Element_itertext_impl(ElementObject *self)
1514 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1515 {
1516     return create_elementiter(self, Py_None, 1);
1517 }
1518 
1519 
1520 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1521 element_getitem(PyObject* self_, Py_ssize_t index)
1522 {
1523     ElementObject* self = (ElementObject*) self_;
1524 
1525     if (!self->extra || index < 0 || index >= self->extra->length) {
1526         PyErr_SetString(
1527             PyExc_IndexError,
1528             "child index out of range"
1529             );
1530         return NULL;
1531     }
1532 
1533     Py_INCREF(self->extra->children[index]);
1534     return self->extra->children[index];
1535 }
1536 
1537 /*[clinic input]
1538 _elementtree.Element.insert
1539 
1540     index: Py_ssize_t
1541     subelement: object(subclass_of='&Element_Type')
1542     /
1543 
1544 [clinic start generated code]*/
1545 
1546 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1547 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1548                                  PyObject *subelement)
1549 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1550 {
1551     Py_ssize_t i;
1552 
1553     if (!self->extra) {
1554         if (create_extra(self, NULL) < 0)
1555             return NULL;
1556     }
1557 
1558     if (index < 0) {
1559         index += self->extra->length;
1560         if (index < 0)
1561             index = 0;
1562     }
1563     if (index > self->extra->length)
1564         index = self->extra->length;
1565 
1566     if (element_resize(self, 1) < 0)
1567         return NULL;
1568 
1569     for (i = self->extra->length; i > index; i--)
1570         self->extra->children[i] = self->extra->children[i-1];
1571 
1572     Py_INCREF(subelement);
1573     self->extra->children[index] = subelement;
1574 
1575     self->extra->length++;
1576 
1577     Py_RETURN_NONE;
1578 }
1579 
1580 /*[clinic input]
1581 _elementtree.Element.items
1582 
1583 [clinic start generated code]*/
1584 
1585 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1586 _elementtree_Element_items_impl(ElementObject *self)
1587 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1588 {
1589     if (!self->extra || self->extra->attrib == Py_None)
1590         return PyList_New(0);
1591 
1592     return PyDict_Items(self->extra->attrib);
1593 }
1594 
1595 /*[clinic input]
1596 _elementtree.Element.keys
1597 
1598 [clinic start generated code]*/
1599 
1600 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1601 _elementtree_Element_keys_impl(ElementObject *self)
1602 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1603 {
1604     if (!self->extra || self->extra->attrib == Py_None)
1605         return PyList_New(0);
1606 
1607     return PyDict_Keys(self->extra->attrib);
1608 }
1609 
1610 static Py_ssize_t
element_length(ElementObject * self)1611 element_length(ElementObject* self)
1612 {
1613     if (!self->extra)
1614         return 0;
1615 
1616     return self->extra->length;
1617 }
1618 
1619 /*[clinic input]
1620 _elementtree.Element.makeelement
1621 
1622     tag: object
1623     attrib: object
1624     /
1625 
1626 [clinic start generated code]*/
1627 
1628 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1629 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1630                                       PyObject *attrib)
1631 /*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
1632 {
1633     PyObject* elem;
1634 
1635     attrib = PyDict_Copy(attrib);
1636     if (!attrib)
1637         return NULL;
1638 
1639     elem = create_new_element(tag, attrib);
1640 
1641     Py_DECREF(attrib);
1642 
1643     return elem;
1644 }
1645 
1646 /*[clinic input]
1647 _elementtree.Element.remove
1648 
1649     subelement: object(subclass_of='&Element_Type')
1650     /
1651 
1652 [clinic start generated code]*/
1653 
1654 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1655 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1656 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1657 {
1658     Py_ssize_t i;
1659     int rc;
1660     PyObject *found;
1661 
1662     if (!self->extra) {
1663         /* element has no children, so raise exception */
1664         PyErr_SetString(
1665             PyExc_ValueError,
1666             "list.remove(x): x not in list"
1667             );
1668         return NULL;
1669     }
1670 
1671     for (i = 0; i < self->extra->length; i++) {
1672         if (self->extra->children[i] == subelement)
1673             break;
1674         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1675         if (rc > 0)
1676             break;
1677         if (rc < 0)
1678             return NULL;
1679     }
1680 
1681     if (i >= self->extra->length) {
1682         /* subelement is not in children, so raise exception */
1683         PyErr_SetString(
1684             PyExc_ValueError,
1685             "list.remove(x): x not in list"
1686             );
1687         return NULL;
1688     }
1689 
1690     found = self->extra->children[i];
1691 
1692     self->extra->length--;
1693     for (; i < self->extra->length; i++)
1694         self->extra->children[i] = self->extra->children[i+1];
1695 
1696     Py_DECREF(found);
1697     Py_RETURN_NONE;
1698 }
1699 
1700 static PyObject*
element_repr(ElementObject * self)1701 element_repr(ElementObject* self)
1702 {
1703     int status;
1704 
1705     if (self->tag == NULL)
1706         return PyUnicode_FromFormat("<Element at %p>", self);
1707 
1708     status = Py_ReprEnter((PyObject *)self);
1709     if (status == 0) {
1710         PyObject *res;
1711         res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1712         Py_ReprLeave((PyObject *)self);
1713         return res;
1714     }
1715     if (status > 0)
1716         PyErr_Format(PyExc_RuntimeError,
1717                      "reentrant call inside %s.__repr__",
1718                      Py_TYPE(self)->tp_name);
1719     return NULL;
1720 }
1721 
1722 /*[clinic input]
1723 _elementtree.Element.set
1724 
1725     key: object
1726     value: object
1727     /
1728 
1729 [clinic start generated code]*/
1730 
1731 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1732 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1733                               PyObject *value)
1734 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1735 {
1736     PyObject* attrib;
1737 
1738     if (!self->extra) {
1739         if (create_extra(self, NULL) < 0)
1740             return NULL;
1741     }
1742 
1743     attrib = element_get_attrib(self);
1744     if (!attrib)
1745         return NULL;
1746 
1747     if (PyDict_SetItem(attrib, key, value) < 0)
1748         return NULL;
1749 
1750     Py_RETURN_NONE;
1751 }
1752 
1753 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1754 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1755 {
1756     ElementObject* self = (ElementObject*) self_;
1757     Py_ssize_t i;
1758     PyObject* old;
1759 
1760     if (!self->extra || index < 0 || index >= self->extra->length) {
1761         PyErr_SetString(
1762             PyExc_IndexError,
1763             "child assignment index out of range");
1764         return -1;
1765     }
1766 
1767     old = self->extra->children[index];
1768 
1769     if (item) {
1770         if (!Element_Check(item)) {
1771             raise_type_error(item);
1772             return -1;
1773         }
1774         Py_INCREF(item);
1775         self->extra->children[index] = item;
1776     } else {
1777         self->extra->length--;
1778         for (i = index; i < self->extra->length; i++)
1779             self->extra->children[i] = self->extra->children[i+1];
1780     }
1781 
1782     Py_DECREF(old);
1783 
1784     return 0;
1785 }
1786 
1787 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1788 element_subscr(PyObject* self_, PyObject* item)
1789 {
1790     ElementObject* self = (ElementObject*) self_;
1791 
1792     if (PyIndex_Check(item)) {
1793         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1794 
1795         if (i == -1 && PyErr_Occurred()) {
1796             return NULL;
1797         }
1798         if (i < 0 && self->extra)
1799             i += self->extra->length;
1800         return element_getitem(self_, i);
1801     }
1802     else if (PySlice_Check(item)) {
1803         Py_ssize_t start, stop, step, slicelen, i;
1804         size_t cur;
1805         PyObject* list;
1806 
1807         if (!self->extra)
1808             return PyList_New(0);
1809 
1810         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1811             return NULL;
1812         }
1813         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1814                                          step);
1815 
1816         if (slicelen <= 0)
1817             return PyList_New(0);
1818         else {
1819             list = PyList_New(slicelen);
1820             if (!list)
1821                 return NULL;
1822 
1823             for (cur = start, i = 0; i < slicelen;
1824                  cur += step, i++) {
1825                 PyObject* item = self->extra->children[cur];
1826                 Py_INCREF(item);
1827                 PyList_SET_ITEM(list, i, item);
1828             }
1829 
1830             return list;
1831         }
1832     }
1833     else {
1834         PyErr_SetString(PyExc_TypeError,
1835                 "element indices must be integers");
1836         return NULL;
1837     }
1838 }
1839 
1840 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1841 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1842 {
1843     ElementObject* self = (ElementObject*) self_;
1844 
1845     if (PyIndex_Check(item)) {
1846         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1847 
1848         if (i == -1 && PyErr_Occurred()) {
1849             return -1;
1850         }
1851         if (i < 0 && self->extra)
1852             i += self->extra->length;
1853         return element_setitem(self_, i, value);
1854     }
1855     else if (PySlice_Check(item)) {
1856         Py_ssize_t start, stop, step, slicelen, newlen, i;
1857         size_t cur;
1858 
1859         PyObject* recycle = NULL;
1860         PyObject* seq;
1861 
1862         if (!self->extra) {
1863             if (create_extra(self, NULL) < 0)
1864                 return -1;
1865         }
1866 
1867         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1868             return -1;
1869         }
1870         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1871                                          step);
1872 
1873         if (value == NULL) {
1874             /* Delete slice */
1875             size_t cur;
1876             Py_ssize_t i;
1877 
1878             if (slicelen <= 0)
1879                 return 0;
1880 
1881             /* Since we're deleting, the direction of the range doesn't matter,
1882              * so for simplicity make it always ascending.
1883             */
1884             if (step < 0) {
1885                 stop = start + 1;
1886                 start = stop + step * (slicelen - 1) - 1;
1887                 step = -step;
1888             }
1889 
1890             assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1891 
1892             /* recycle is a list that will contain all the children
1893              * scheduled for removal.
1894             */
1895             if (!(recycle = PyList_New(slicelen))) {
1896                 return -1;
1897             }
1898 
1899             /* This loop walks over all the children that have to be deleted,
1900              * with cur pointing at them. num_moved is the amount of children
1901              * until the next deleted child that have to be "shifted down" to
1902              * occupy the deleted's places.
1903              * Note that in the ith iteration, shifting is done i+i places down
1904              * because i children were already removed.
1905             */
1906             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1907                 /* Compute how many children have to be moved, clipping at the
1908                  * list end.
1909                 */
1910                 Py_ssize_t num_moved = step - 1;
1911                 if (cur + step >= (size_t)self->extra->length) {
1912                     num_moved = self->extra->length - cur - 1;
1913                 }
1914 
1915                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1916 
1917                 memmove(
1918                     self->extra->children + cur - i,
1919                     self->extra->children + cur + 1,
1920                     num_moved * sizeof(PyObject *));
1921             }
1922 
1923             /* Leftover "tail" after the last removed child */
1924             cur = start + (size_t)slicelen * step;
1925             if (cur < (size_t)self->extra->length) {
1926                 memmove(
1927                     self->extra->children + cur - slicelen,
1928                     self->extra->children + cur,
1929                     (self->extra->length - cur) * sizeof(PyObject *));
1930             }
1931 
1932             self->extra->length -= slicelen;
1933 
1934             /* Discard the recycle list with all the deleted sub-elements */
1935             Py_DECREF(recycle);
1936             return 0;
1937         }
1938 
1939         /* A new slice is actually being assigned */
1940         seq = PySequence_Fast(value, "");
1941         if (!seq) {
1942             PyErr_Format(
1943                 PyExc_TypeError,
1944                 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1945                 );
1946             return -1;
1947         }
1948         newlen = PySequence_Fast_GET_SIZE(seq);
1949 
1950         if (step !=  1 && newlen != slicelen)
1951         {
1952             Py_DECREF(seq);
1953             PyErr_Format(PyExc_ValueError,
1954                 "attempt to assign sequence of size %zd "
1955                 "to extended slice of size %zd",
1956                 newlen, slicelen
1957                 );
1958             return -1;
1959         }
1960 
1961         /* Resize before creating the recycle bin, to prevent refleaks. */
1962         if (newlen > slicelen) {
1963             if (element_resize(self, newlen - slicelen) < 0) {
1964                 Py_DECREF(seq);
1965                 return -1;
1966             }
1967         }
1968 
1969         for (i = 0; i < newlen; i++) {
1970             PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1971             if (!Element_Check(element)) {
1972                 raise_type_error(element);
1973                 Py_DECREF(seq);
1974                 return -1;
1975             }
1976         }
1977 
1978         if (slicelen > 0) {
1979             /* to avoid recursive calls to this method (via decref), move
1980                old items to the recycle bin here, and get rid of them when
1981                we're done modifying the element */
1982             recycle = PyList_New(slicelen);
1983             if (!recycle) {
1984                 Py_DECREF(seq);
1985                 return -1;
1986             }
1987             for (cur = start, i = 0; i < slicelen;
1988                  cur += step, i++)
1989                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1990         }
1991 
1992         if (newlen < slicelen) {
1993             /* delete slice */
1994             for (i = stop; i < self->extra->length; i++)
1995                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1996         } else if (newlen > slicelen) {
1997             /* insert slice */
1998             for (i = self->extra->length-1; i >= stop; i--)
1999                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2000         }
2001 
2002         /* replace the slice */
2003         for (cur = start, i = 0; i < newlen;
2004              cur += step, i++) {
2005             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2006             Py_INCREF(element);
2007             self->extra->children[cur] = element;
2008         }
2009 
2010         self->extra->length += newlen - slicelen;
2011 
2012         Py_DECREF(seq);
2013 
2014         /* discard the recycle bin, and everything in it */
2015         Py_XDECREF(recycle);
2016 
2017         return 0;
2018     }
2019     else {
2020         PyErr_SetString(PyExc_TypeError,
2021                 "element indices must be integers");
2022         return -1;
2023     }
2024 }
2025 
2026 static PyObject*
element_tag_getter(ElementObject * self,void * closure)2027 element_tag_getter(ElementObject *self, void *closure)
2028 {
2029     PyObject *res = self->tag;
2030     Py_INCREF(res);
2031     return res;
2032 }
2033 
2034 static PyObject*
element_text_getter(ElementObject * self,void * closure)2035 element_text_getter(ElementObject *self, void *closure)
2036 {
2037     PyObject *res = element_get_text(self);
2038     Py_XINCREF(res);
2039     return res;
2040 }
2041 
2042 static PyObject*
element_tail_getter(ElementObject * self,void * closure)2043 element_tail_getter(ElementObject *self, void *closure)
2044 {
2045     PyObject *res = element_get_tail(self);
2046     Py_XINCREF(res);
2047     return res;
2048 }
2049 
2050 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)2051 element_attrib_getter(ElementObject *self, void *closure)
2052 {
2053     PyObject *res;
2054     if (!self->extra) {
2055         if (create_extra(self, NULL) < 0)
2056             return NULL;
2057     }
2058     res = element_get_attrib(self);
2059     Py_XINCREF(res);
2060     return res;
2061 }
2062 
2063 /* macro for setter validation */
2064 #define _VALIDATE_ATTR_VALUE(V)                     \
2065     if ((V) == NULL) {                              \
2066         PyErr_SetString(                            \
2067             PyExc_AttributeError,                   \
2068             "can't delete element attribute");      \
2069         return -1;                                  \
2070     }
2071 
2072 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2073 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2074 {
2075     _VALIDATE_ATTR_VALUE(value);
2076     Py_INCREF(value);
2077     Py_SETREF(self->tag, value);
2078     return 0;
2079 }
2080 
2081 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2082 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2083 {
2084     _VALIDATE_ATTR_VALUE(value);
2085     Py_INCREF(value);
2086     _set_joined_ptr(&self->text, value);
2087     return 0;
2088 }
2089 
2090 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2091 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2092 {
2093     _VALIDATE_ATTR_VALUE(value);
2094     Py_INCREF(value);
2095     _set_joined_ptr(&self->tail, value);
2096     return 0;
2097 }
2098 
2099 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2100 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2101 {
2102     _VALIDATE_ATTR_VALUE(value);
2103     if (!self->extra) {
2104         if (create_extra(self, NULL) < 0)
2105             return -1;
2106     }
2107     Py_INCREF(value);
2108     Py_SETREF(self->extra->attrib, value);
2109     return 0;
2110 }
2111 
2112 static PySequenceMethods element_as_sequence = {
2113     (lenfunc) element_length,
2114     0, /* sq_concat */
2115     0, /* sq_repeat */
2116     element_getitem,
2117     0,
2118     element_setitem,
2119     0,
2120 };
2121 
2122 /******************************* Element iterator ****************************/
2123 
2124 /* ElementIterObject represents the iteration state over an XML element in
2125  * pre-order traversal. To keep track of which sub-element should be returned
2126  * next, a stack of parents is maintained. This is a standard stack-based
2127  * iterative pre-order traversal of a tree.
2128  * The stack is managed using a continuous array.
2129  * Each stack item contains the saved parent to which we should return after
2130  * the current one is exhausted, and the next child to examine in that parent.
2131  */
2132 typedef struct ParentLocator_t {
2133     ElementObject *parent;
2134     Py_ssize_t child_index;
2135 } ParentLocator;
2136 
2137 typedef struct {
2138     PyObject_HEAD
2139     ParentLocator *parent_stack;
2140     Py_ssize_t parent_stack_used;
2141     Py_ssize_t parent_stack_size;
2142     ElementObject *root_element;
2143     PyObject *sought_tag;
2144     int gettext;
2145 } ElementIterObject;
2146 
2147 
2148 static void
elementiter_dealloc(ElementIterObject * it)2149 elementiter_dealloc(ElementIterObject *it)
2150 {
2151     Py_ssize_t i = it->parent_stack_used;
2152     it->parent_stack_used = 0;
2153     /* bpo-31095: UnTrack is needed before calling any callbacks */
2154     PyObject_GC_UnTrack(it);
2155     while (i--)
2156         Py_XDECREF(it->parent_stack[i].parent);
2157     PyMem_Free(it->parent_stack);
2158 
2159     Py_XDECREF(it->sought_tag);
2160     Py_XDECREF(it->root_element);
2161 
2162     PyObject_GC_Del(it);
2163 }
2164 
2165 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2166 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2167 {
2168     Py_ssize_t i = it->parent_stack_used;
2169     while (i--)
2170         Py_VISIT(it->parent_stack[i].parent);
2171 
2172     Py_VISIT(it->root_element);
2173     Py_VISIT(it->sought_tag);
2174     return 0;
2175 }
2176 
2177 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2178  */
2179 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2180 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2181 {
2182     ParentLocator *item;
2183 
2184     if (it->parent_stack_used >= it->parent_stack_size) {
2185         Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2186         ParentLocator *parent_stack = it->parent_stack;
2187         PyMem_Resize(parent_stack, ParentLocator, new_size);
2188         if (parent_stack == NULL)
2189             return -1;
2190         it->parent_stack = parent_stack;
2191         it->parent_stack_size = new_size;
2192     }
2193     item = it->parent_stack + it->parent_stack_used++;
2194     Py_INCREF(parent);
2195     item->parent = parent;
2196     item->child_index = 0;
2197     return 0;
2198 }
2199 
2200 static PyObject *
elementiter_next(ElementIterObject * it)2201 elementiter_next(ElementIterObject *it)
2202 {
2203     /* Sub-element iterator.
2204      *
2205      * A short note on gettext: this function serves both the iter() and
2206      * itertext() methods to avoid code duplication. However, there are a few
2207      * small differences in the way these iterations work. Namely:
2208      *   - itertext() only yields text from nodes that have it, and continues
2209      *     iterating when a node doesn't have text (so it doesn't return any
2210      *     node like iter())
2211      *   - itertext() also has to handle tail, after finishing with all the
2212      *     children of a node.
2213      */
2214     int rc;
2215     ElementObject *elem;
2216     PyObject *text;
2217 
2218     while (1) {
2219         /* Handle the case reached in the beginning and end of iteration, where
2220          * the parent stack is empty. If root_element is NULL and we're here, the
2221          * iterator is exhausted.
2222          */
2223         if (!it->parent_stack_used) {
2224             if (!it->root_element) {
2225                 PyErr_SetNone(PyExc_StopIteration);
2226                 return NULL;
2227             }
2228 
2229             elem = it->root_element;  /* steals a reference */
2230             it->root_element = NULL;
2231         }
2232         else {
2233             /* See if there are children left to traverse in the current parent. If
2234              * yes, visit the next child. If not, pop the stack and try again.
2235              */
2236             ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2237             Py_ssize_t child_index = item->child_index;
2238             ElementObjectExtra *extra;
2239             elem = item->parent;
2240             extra = elem->extra;
2241             if (!extra || child_index >= extra->length) {
2242                 it->parent_stack_used--;
2243                 /* Note that extra condition on it->parent_stack_used here;
2244                  * this is because itertext() is supposed to only return *inner*
2245                  * text, not text following the element it began iteration with.
2246                  */
2247                 if (it->gettext && it->parent_stack_used) {
2248                     text = element_get_tail(elem);
2249                     goto gettext;
2250                 }
2251                 Py_DECREF(elem);
2252                 continue;
2253             }
2254 
2255             assert(Element_Check(extra->children[child_index]));
2256             elem = (ElementObject *)extra->children[child_index];
2257             item->child_index++;
2258             Py_INCREF(elem);
2259         }
2260 
2261         if (parent_stack_push_new(it, elem) < 0) {
2262             Py_DECREF(elem);
2263             PyErr_NoMemory();
2264             return NULL;
2265         }
2266         if (it->gettext) {
2267             text = element_get_text(elem);
2268             goto gettext;
2269         }
2270 
2271         if (it->sought_tag == Py_None)
2272             return (PyObject *)elem;
2273 
2274         rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2275         if (rc > 0)
2276             return (PyObject *)elem;
2277 
2278         Py_DECREF(elem);
2279         if (rc < 0)
2280             return NULL;
2281         continue;
2282 
2283 gettext:
2284         if (!text) {
2285             Py_DECREF(elem);
2286             return NULL;
2287         }
2288         if (text == Py_None) {
2289             Py_DECREF(elem);
2290         }
2291         else {
2292             Py_INCREF(text);
2293             Py_DECREF(elem);
2294             rc = PyObject_IsTrue(text);
2295             if (rc > 0)
2296                 return text;
2297             Py_DECREF(text);
2298             if (rc < 0)
2299                 return NULL;
2300         }
2301     }
2302 
2303     return NULL;
2304 }
2305 
2306 
2307 static PyTypeObject ElementIter_Type = {
2308     PyVarObject_HEAD_INIT(NULL, 0)
2309     /* Using the module's name since the pure-Python implementation does not
2310        have such a type. */
2311     "_elementtree._element_iterator",           /* tp_name */
2312     sizeof(ElementIterObject),                  /* tp_basicsize */
2313     0,                                          /* tp_itemsize */
2314     /* methods */
2315     (destructor)elementiter_dealloc,            /* tp_dealloc */
2316     0,                                          /* tp_vectorcall_offset */
2317     0,                                          /* tp_getattr */
2318     0,                                          /* tp_setattr */
2319     0,                                          /* tp_as_async */
2320     0,                                          /* tp_repr */
2321     0,                                          /* tp_as_number */
2322     0,                                          /* tp_as_sequence */
2323     0,                                          /* tp_as_mapping */
2324     0,                                          /* tp_hash */
2325     0,                                          /* tp_call */
2326     0,                                          /* tp_str */
2327     0,                                          /* tp_getattro */
2328     0,                                          /* tp_setattro */
2329     0,                                          /* tp_as_buffer */
2330     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2331     0,                                          /* tp_doc */
2332     (traverseproc)elementiter_traverse,         /* tp_traverse */
2333     0,                                          /* tp_clear */
2334     0,                                          /* tp_richcompare */
2335     0,                                          /* tp_weaklistoffset */
2336     PyObject_SelfIter,                          /* tp_iter */
2337     (iternextfunc)elementiter_next,             /* tp_iternext */
2338     0,                                          /* tp_methods */
2339     0,                                          /* tp_members */
2340     0,                                          /* tp_getset */
2341     0,                                          /* tp_base */
2342     0,                                          /* tp_dict */
2343     0,                                          /* tp_descr_get */
2344     0,                                          /* tp_descr_set */
2345     0,                                          /* tp_dictoffset */
2346     0,                                          /* tp_init */
2347     0,                                          /* tp_alloc */
2348     0,                                          /* tp_new */
2349 };
2350 
2351 #define INIT_PARENT_STACK_SIZE 8
2352 
2353 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2354 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2355 {
2356     ElementIterObject *it;
2357 
2358     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2359     if (!it)
2360         return NULL;
2361 
2362     Py_INCREF(tag);
2363     it->sought_tag = tag;
2364     it->gettext = gettext;
2365     Py_INCREF(self);
2366     it->root_element = self;
2367 
2368     PyObject_GC_Track(it);
2369 
2370     it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2371     if (it->parent_stack == NULL) {
2372         Py_DECREF(it);
2373         PyErr_NoMemory();
2374         return NULL;
2375     }
2376     it->parent_stack_used = 0;
2377     it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2378 
2379     return (PyObject *)it;
2380 }
2381 
2382 
2383 /* ==================================================================== */
2384 /* the tree builder type */
2385 
2386 typedef struct {
2387     PyObject_HEAD
2388 
2389     PyObject *root; /* root node (first created node) */
2390 
2391     PyObject *this; /* current node */
2392     PyObject *last; /* most recently created node */
2393     PyObject *last_for_tail; /* most recently created node that takes a tail */
2394 
2395     PyObject *data; /* data collector (string or list), or NULL */
2396 
2397     PyObject *stack; /* element stack */
2398     Py_ssize_t index; /* current stack size (0 means empty) */
2399 
2400     PyObject *element_factory;
2401     PyObject *comment_factory;
2402     PyObject *pi_factory;
2403 
2404     /* element tracing */
2405     PyObject *events_append; /* the append method of the list of events, or NULL */
2406     PyObject *start_event_obj; /* event objects (NULL to ignore) */
2407     PyObject *end_event_obj;
2408     PyObject *start_ns_event_obj;
2409     PyObject *end_ns_event_obj;
2410     PyObject *comment_event_obj;
2411     PyObject *pi_event_obj;
2412 
2413     char insert_comments;
2414     char insert_pis;
2415 } TreeBuilderObject;
2416 
2417 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
2418 
2419 /* -------------------------------------------------------------------- */
2420 /* constructor and destructor */
2421 
2422 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2423 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2424 {
2425     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2426     if (t != NULL) {
2427         t->root = NULL;
2428 
2429         Py_INCREF(Py_None);
2430         t->this = Py_None;
2431         Py_INCREF(Py_None);
2432         t->last = Py_None;
2433 
2434         t->data = NULL;
2435         t->element_factory = NULL;
2436         t->comment_factory = NULL;
2437         t->pi_factory = NULL;
2438         t->stack = PyList_New(20);
2439         if (!t->stack) {
2440             Py_DECREF(t->this);
2441             Py_DECREF(t->last);
2442             Py_DECREF((PyObject *) t);
2443             return NULL;
2444         }
2445         t->index = 0;
2446 
2447         t->events_append = NULL;
2448         t->start_event_obj = t->end_event_obj = NULL;
2449         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2450         t->comment_event_obj = t->pi_event_obj = NULL;
2451         t->insert_comments = t->insert_pis = 0;
2452     }
2453     return (PyObject *)t;
2454 }
2455 
2456 /*[clinic input]
2457 _elementtree.TreeBuilder.__init__
2458 
2459     element_factory: object = None
2460     *
2461     comment_factory: object = None
2462     pi_factory: object = None
2463     insert_comments: bool = False
2464     insert_pis: bool = False
2465 
2466 [clinic start generated code]*/
2467 
2468 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2469 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2470                                        PyObject *element_factory,
2471                                        PyObject *comment_factory,
2472                                        PyObject *pi_factory,
2473                                        int insert_comments, int insert_pis)
2474 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2475 {
2476     if (element_factory != Py_None) {
2477         Py_INCREF(element_factory);
2478         Py_XSETREF(self->element_factory, element_factory);
2479     } else {
2480         Py_CLEAR(self->element_factory);
2481     }
2482 
2483     if (comment_factory == Py_None) {
2484         elementtreestate *st = ET_STATE_GLOBAL;
2485         comment_factory = st->comment_factory;
2486     }
2487     if (comment_factory) {
2488         Py_INCREF(comment_factory);
2489         Py_XSETREF(self->comment_factory, comment_factory);
2490         self->insert_comments = insert_comments;
2491     } else {
2492         Py_CLEAR(self->comment_factory);
2493         self->insert_comments = 0;
2494     }
2495 
2496     if (pi_factory == Py_None) {
2497         elementtreestate *st = ET_STATE_GLOBAL;
2498         pi_factory = st->pi_factory;
2499     }
2500     if (pi_factory) {
2501         Py_INCREF(pi_factory);
2502         Py_XSETREF(self->pi_factory, pi_factory);
2503         self->insert_pis = insert_pis;
2504     } else {
2505         Py_CLEAR(self->pi_factory);
2506         self->insert_pis = 0;
2507     }
2508 
2509     return 0;
2510 }
2511 
2512 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2513 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2514 {
2515     Py_VISIT(self->pi_event_obj);
2516     Py_VISIT(self->comment_event_obj);
2517     Py_VISIT(self->end_ns_event_obj);
2518     Py_VISIT(self->start_ns_event_obj);
2519     Py_VISIT(self->end_event_obj);
2520     Py_VISIT(self->start_event_obj);
2521     Py_VISIT(self->events_append);
2522     Py_VISIT(self->root);
2523     Py_VISIT(self->this);
2524     Py_VISIT(self->last);
2525     Py_VISIT(self->last_for_tail);
2526     Py_VISIT(self->data);
2527     Py_VISIT(self->stack);
2528     Py_VISIT(self->pi_factory);
2529     Py_VISIT(self->comment_factory);
2530     Py_VISIT(self->element_factory);
2531     return 0;
2532 }
2533 
2534 static int
treebuilder_gc_clear(TreeBuilderObject * self)2535 treebuilder_gc_clear(TreeBuilderObject *self)
2536 {
2537     Py_CLEAR(self->pi_event_obj);
2538     Py_CLEAR(self->comment_event_obj);
2539     Py_CLEAR(self->end_ns_event_obj);
2540     Py_CLEAR(self->start_ns_event_obj);
2541     Py_CLEAR(self->end_event_obj);
2542     Py_CLEAR(self->start_event_obj);
2543     Py_CLEAR(self->events_append);
2544     Py_CLEAR(self->stack);
2545     Py_CLEAR(self->data);
2546     Py_CLEAR(self->last);
2547     Py_CLEAR(self->last_for_tail);
2548     Py_CLEAR(self->this);
2549     Py_CLEAR(self->pi_factory);
2550     Py_CLEAR(self->comment_factory);
2551     Py_CLEAR(self->element_factory);
2552     Py_CLEAR(self->root);
2553     return 0;
2554 }
2555 
2556 static void
treebuilder_dealloc(TreeBuilderObject * self)2557 treebuilder_dealloc(TreeBuilderObject *self)
2558 {
2559     PyObject_GC_UnTrack(self);
2560     treebuilder_gc_clear(self);
2561     Py_TYPE(self)->tp_free((PyObject *)self);
2562 }
2563 
2564 /* -------------------------------------------------------------------- */
2565 /* helpers for handling of arbitrary element-like objects */
2566 
2567 /*[clinic input]
2568 _elementtree._set_factories
2569 
2570     comment_factory: object
2571     pi_factory: object
2572     /
2573 
2574 Change the factories used to create comments and processing instructions.
2575 
2576 For internal use only.
2577 [clinic start generated code]*/
2578 
2579 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2580 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2581                                  PyObject *pi_factory)
2582 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2583 {
2584     elementtreestate *st = ET_STATE_GLOBAL;
2585     PyObject *old;
2586 
2587     if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2588         PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2589                      Py_TYPE(comment_factory)->tp_name);
2590         return NULL;
2591     }
2592     if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2593         PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2594                      Py_TYPE(pi_factory)->tp_name);
2595         return NULL;
2596     }
2597 
2598     old = PyTuple_Pack(2,
2599         st->comment_factory ? st->comment_factory : Py_None,
2600         st->pi_factory ? st->pi_factory : Py_None);
2601 
2602     if (comment_factory == Py_None) {
2603         Py_CLEAR(st->comment_factory);
2604     } else {
2605         Py_INCREF(comment_factory);
2606         Py_XSETREF(st->comment_factory, comment_factory);
2607     }
2608     if (pi_factory == Py_None) {
2609         Py_CLEAR(st->pi_factory);
2610     } else {
2611         Py_INCREF(pi_factory);
2612         Py_XSETREF(st->pi_factory, pi_factory);
2613     }
2614 
2615     return old;
2616 }
2617 
2618 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2619 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2620                                         PyObject **dest, _Py_Identifier *name)
2621 {
2622     /* Fast paths for the "almost always" cases. */
2623     if (Element_CheckExact(element)) {
2624         PyObject *dest_obj = JOIN_OBJ(*dest);
2625         if (dest_obj == Py_None) {
2626             *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2627             *data = NULL;
2628             Py_DECREF(dest_obj);
2629             return 0;
2630         }
2631         else if (JOIN_GET(*dest)) {
2632             if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2633                 return -1;
2634             }
2635             Py_CLEAR(*data);
2636             return 0;
2637         }
2638     }
2639 
2640     /*  Fallback for the non-Element / non-trivial cases. */
2641     {
2642         int r;
2643         PyObject* joined;
2644         PyObject* previous = _PyObject_GetAttrId(element, name);
2645         if (!previous)
2646             return -1;
2647         joined = list_join(*data);
2648         if (!joined) {
2649             Py_DECREF(previous);
2650             return -1;
2651         }
2652         if (previous != Py_None) {
2653             PyObject *tmp = PyNumber_Add(previous, joined);
2654             Py_DECREF(joined);
2655             Py_DECREF(previous);
2656             if (!tmp)
2657                 return -1;
2658             joined = tmp;
2659         } else {
2660             Py_DECREF(previous);
2661         }
2662 
2663         r = _PyObject_SetAttrId(element, name, joined);
2664         Py_DECREF(joined);
2665         if (r < 0)
2666             return -1;
2667         Py_CLEAR(*data);
2668         return 0;
2669     }
2670 }
2671 
2672 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2673 treebuilder_flush_data(TreeBuilderObject* self)
2674 {
2675     if (!self->data) {
2676         return 0;
2677     }
2678 
2679     if (!self->last_for_tail) {
2680         PyObject *element = self->last;
2681         _Py_IDENTIFIER(text);
2682         return treebuilder_extend_element_text_or_tail(
2683                 element, &self->data,
2684                 &((ElementObject *) element)->text, &PyId_text);
2685     }
2686     else {
2687         PyObject *element = self->last_for_tail;
2688         _Py_IDENTIFIER(tail);
2689         return treebuilder_extend_element_text_or_tail(
2690                 element, &self->data,
2691                 &((ElementObject *) element)->tail, &PyId_tail);
2692     }
2693 }
2694 
2695 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2696 treebuilder_add_subelement(PyObject *element, PyObject *child)
2697 {
2698     _Py_IDENTIFIER(append);
2699     if (Element_CheckExact(element)) {
2700         ElementObject *elem = (ElementObject *) element;
2701         return element_add_subelement(elem, child);
2702     }
2703     else {
2704         PyObject *res;
2705         res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
2706         if (res == NULL)
2707             return -1;
2708         Py_DECREF(res);
2709         return 0;
2710     }
2711 }
2712 
2713 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2714 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2715                          PyObject *node)
2716 {
2717     if (action != NULL) {
2718         PyObject *res;
2719         PyObject *event = PyTuple_Pack(2, action, node);
2720         if (event == NULL)
2721             return -1;
2722         res = _PyObject_FastCall(self->events_append, &event, 1);
2723         Py_DECREF(event);
2724         if (res == NULL)
2725             return -1;
2726         Py_DECREF(res);
2727     }
2728     return 0;
2729 }
2730 
2731 /* -------------------------------------------------------------------- */
2732 /* handlers */
2733 
2734 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2735 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2736                          PyObject* attrib)
2737 {
2738     PyObject* node;
2739     PyObject* this;
2740     elementtreestate *st = ET_STATE_GLOBAL;
2741 
2742     if (treebuilder_flush_data(self) < 0) {
2743         return NULL;
2744     }
2745 
2746     if (!self->element_factory) {
2747         node = create_new_element(tag, attrib);
2748     } else if (attrib == Py_None) {
2749         attrib = PyDict_New();
2750         if (!attrib)
2751             return NULL;
2752         node = PyObject_CallFunctionObjArgs(self->element_factory,
2753                                             tag, attrib, NULL);
2754         Py_DECREF(attrib);
2755     }
2756     else {
2757         node = PyObject_CallFunctionObjArgs(self->element_factory,
2758                                             tag, attrib, NULL);
2759     }
2760     if (!node) {
2761         return NULL;
2762     }
2763 
2764     this = self->this;
2765     Py_CLEAR(self->last_for_tail);
2766 
2767     if (this != Py_None) {
2768         if (treebuilder_add_subelement(this, node) < 0)
2769             goto error;
2770     } else {
2771         if (self->root) {
2772             PyErr_SetString(
2773                 st->parseerror_obj,
2774                 "multiple elements on top level"
2775                 );
2776             goto error;
2777         }
2778         Py_INCREF(node);
2779         self->root = node;
2780     }
2781 
2782     if (self->index < PyList_GET_SIZE(self->stack)) {
2783         if (PyList_SetItem(self->stack, self->index, this) < 0)
2784             goto error;
2785         Py_INCREF(this);
2786     } else {
2787         if (PyList_Append(self->stack, this) < 0)
2788             goto error;
2789     }
2790     self->index++;
2791 
2792     Py_INCREF(node);
2793     Py_SETREF(self->this, node);
2794     Py_INCREF(node);
2795     Py_SETREF(self->last, node);
2796 
2797     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2798         goto error;
2799 
2800     return node;
2801 
2802   error:
2803     Py_DECREF(node);
2804     return NULL;
2805 }
2806 
2807 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2808 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2809 {
2810     if (!self->data) {
2811         if (self->last == Py_None) {
2812             /* ignore calls to data before the first call to start */
2813             Py_RETURN_NONE;
2814         }
2815         /* store the first item as is */
2816         Py_INCREF(data); self->data = data;
2817     } else {
2818         /* more than one item; use a list to collect items */
2819         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2820             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2821             /* XXX this code path unused in Python 3? */
2822             /* expat often generates single character data sections; handle
2823                the most common case by resizing the existing string... */
2824             Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2825             if (_PyBytes_Resize(&self->data, size + 1) < 0)
2826                 return NULL;
2827             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2828         } else if (PyList_CheckExact(self->data)) {
2829             if (PyList_Append(self->data, data) < 0)
2830                 return NULL;
2831         } else {
2832             PyObject* list = PyList_New(2);
2833             if (!list)
2834                 return NULL;
2835             PyList_SET_ITEM(list, 0, self->data);
2836             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2837             self->data = list;
2838         }
2839     }
2840 
2841     Py_RETURN_NONE;
2842 }
2843 
2844 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2845 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2846 {
2847     PyObject* item;
2848 
2849     if (treebuilder_flush_data(self) < 0) {
2850         return NULL;
2851     }
2852 
2853     if (self->index == 0) {
2854         PyErr_SetString(
2855             PyExc_IndexError,
2856             "pop from empty stack"
2857             );
2858         return NULL;
2859     }
2860 
2861     item = self->last;
2862     self->last = self->this;
2863     Py_INCREF(self->last);
2864     Py_XSETREF(self->last_for_tail, self->last);
2865     self->index--;
2866     self->this = PyList_GET_ITEM(self->stack, self->index);
2867     Py_INCREF(self->this);
2868     Py_DECREF(item);
2869 
2870     if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2871         return NULL;
2872 
2873     Py_INCREF(self->last);
2874     return (PyObject*) self->last;
2875 }
2876 
2877 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2878 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2879 {
2880     PyObject* comment;
2881     PyObject* this;
2882 
2883     if (treebuilder_flush_data(self) < 0) {
2884         return NULL;
2885     }
2886 
2887     if (self->comment_factory) {
2888         comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2889         if (!comment)
2890             return NULL;
2891 
2892         this = self->this;
2893         if (self->insert_comments && this != Py_None) {
2894             if (treebuilder_add_subelement(this, comment) < 0)
2895                 goto error;
2896             Py_INCREF(comment);
2897             Py_XSETREF(self->last_for_tail, comment);
2898         }
2899     } else {
2900         Py_INCREF(text);
2901         comment = text;
2902     }
2903 
2904     if (self->events_append && self->comment_event_obj) {
2905         if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2906             goto error;
2907     }
2908 
2909     return comment;
2910 
2911   error:
2912     Py_DECREF(comment);
2913     return NULL;
2914 }
2915 
2916 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2917 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2918 {
2919     PyObject* pi;
2920     PyObject* this;
2921     PyObject* stack[2] = {target, text};
2922 
2923     if (treebuilder_flush_data(self) < 0) {
2924         return NULL;
2925     }
2926 
2927     if (self->pi_factory) {
2928         pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2929         if (!pi) {
2930             return NULL;
2931         }
2932 
2933         this = self->this;
2934         if (self->insert_pis && this != Py_None) {
2935             if (treebuilder_add_subelement(this, pi) < 0)
2936                 goto error;
2937             Py_INCREF(pi);
2938             Py_XSETREF(self->last_for_tail, pi);
2939         }
2940     } else {
2941         pi = PyTuple_Pack(2, target, text);
2942         if (!pi) {
2943             return NULL;
2944         }
2945     }
2946 
2947     if (self->events_append && self->pi_event_obj) {
2948         if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2949             goto error;
2950     }
2951 
2952     return pi;
2953 
2954   error:
2955     Py_DECREF(pi);
2956     return NULL;
2957 }
2958 
2959 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2960 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2961 {
2962     PyObject* parcel;
2963 
2964     if (self->events_append && self->start_ns_event_obj) {
2965         parcel = PyTuple_Pack(2, prefix, uri);
2966         if (!parcel) {
2967             return NULL;
2968         }
2969 
2970         if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2971             Py_DECREF(parcel);
2972             return NULL;
2973         }
2974         Py_DECREF(parcel);
2975     }
2976 
2977     Py_RETURN_NONE;
2978 }
2979 
2980 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2981 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2982 {
2983     if (self->events_append && self->end_ns_event_obj) {
2984         if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2985             return NULL;
2986         }
2987     }
2988 
2989     Py_RETURN_NONE;
2990 }
2991 
2992 /* -------------------------------------------------------------------- */
2993 /* methods (in alphabetical order) */
2994 
2995 /*[clinic input]
2996 _elementtree.TreeBuilder.data
2997 
2998     data: object
2999     /
3000 
3001 [clinic start generated code]*/
3002 
3003 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)3004 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
3005 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
3006 {
3007     return treebuilder_handle_data(self, data);
3008 }
3009 
3010 /*[clinic input]
3011 _elementtree.TreeBuilder.end
3012 
3013     tag: object
3014     /
3015 
3016 [clinic start generated code]*/
3017 
3018 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)3019 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
3020 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
3021 {
3022     return treebuilder_handle_end(self, tag);
3023 }
3024 
3025 /*[clinic input]
3026 _elementtree.TreeBuilder.comment
3027 
3028     text: object
3029     /
3030 
3031 [clinic start generated code]*/
3032 
3033 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)3034 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3035 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3036 {
3037     return treebuilder_handle_comment(self, text);
3038 }
3039 
3040 /*[clinic input]
3041 _elementtree.TreeBuilder.pi
3042 
3043     target: object
3044     text: object = None
3045     /
3046 
3047 [clinic start generated code]*/
3048 
3049 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)3050 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3051                                  PyObject *text)
3052 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3053 {
3054     return treebuilder_handle_pi(self, target, text);
3055 }
3056 
3057 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)3058 treebuilder_done(TreeBuilderObject* self)
3059 {
3060     PyObject* res;
3061 
3062     /* FIXME: check stack size? */
3063 
3064     if (self->root)
3065         res = self->root;
3066     else
3067         res = Py_None;
3068 
3069     Py_INCREF(res);
3070     return res;
3071 }
3072 
3073 /*[clinic input]
3074 _elementtree.TreeBuilder.close
3075 
3076 [clinic start generated code]*/
3077 
3078 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3079 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3080 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3081 {
3082     return treebuilder_done(self);
3083 }
3084 
3085 /*[clinic input]
3086 _elementtree.TreeBuilder.start
3087 
3088     tag: object
3089     attrs: object = None
3090     /
3091 
3092 [clinic start generated code]*/
3093 
3094 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3095 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3096                                     PyObject *attrs)
3097 /*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
3098 {
3099     return treebuilder_handle_start(self, tag, attrs);
3100 }
3101 
3102 /* ==================================================================== */
3103 /* the expat interface */
3104 
3105 #include "expat.h"
3106 #include "pyexpat.h"
3107 
3108 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3109  * cached globally without being in per-module state.
3110  */
3111 static struct PyExpat_CAPI *expat_capi;
3112 #define EXPAT(func) (expat_capi->func)
3113 
3114 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3115     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3116 
3117 typedef struct {
3118     PyObject_HEAD
3119 
3120     XML_Parser parser;
3121 
3122     PyObject *target;
3123     PyObject *entity;
3124 
3125     PyObject *names;
3126 
3127     PyObject *handle_start_ns;
3128     PyObject *handle_end_ns;
3129     PyObject *handle_start;
3130     PyObject *handle_data;
3131     PyObject *handle_end;
3132 
3133     PyObject *handle_comment;
3134     PyObject *handle_pi;
3135     PyObject *handle_doctype;
3136 
3137     PyObject *handle_close;
3138 
3139 } XMLParserObject;
3140 
3141 /* helpers */
3142 
3143 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3144 makeuniversal(XMLParserObject* self, const char* string)
3145 {
3146     /* convert a UTF-8 tag/attribute name from the expat parser
3147        to a universal name string */
3148 
3149     Py_ssize_t size = (Py_ssize_t) strlen(string);
3150     PyObject* key;
3151     PyObject* value;
3152 
3153     /* look the 'raw' name up in the names dictionary */
3154     key = PyBytes_FromStringAndSize(string, size);
3155     if (!key)
3156         return NULL;
3157 
3158     value = PyDict_GetItemWithError(self->names, key);
3159 
3160     if (value) {
3161         Py_INCREF(value);
3162     }
3163     else if (!PyErr_Occurred()) {
3164         /* new name.  convert to universal name, and decode as
3165            necessary */
3166 
3167         PyObject* tag;
3168         char* p;
3169         Py_ssize_t i;
3170 
3171         /* look for namespace separator */
3172         for (i = 0; i < size; i++)
3173             if (string[i] == '}')
3174                 break;
3175         if (i != size) {
3176             /* convert to universal name */
3177             tag = PyBytes_FromStringAndSize(NULL, size+1);
3178             if (tag == NULL) {
3179                 Py_DECREF(key);
3180                 return NULL;
3181             }
3182             p = PyBytes_AS_STRING(tag);
3183             p[0] = '{';
3184             memcpy(p+1, string, size);
3185             size++;
3186         } else {
3187             /* plain name; use key as tag */
3188             Py_INCREF(key);
3189             tag = key;
3190         }
3191 
3192         /* decode universal name */
3193         p = PyBytes_AS_STRING(tag);
3194         value = PyUnicode_DecodeUTF8(p, size, "strict");
3195         Py_DECREF(tag);
3196         if (!value) {
3197             Py_DECREF(key);
3198             return NULL;
3199         }
3200 
3201         /* add to names dictionary */
3202         if (PyDict_SetItem(self->names, key, value) < 0) {
3203             Py_DECREF(key);
3204             Py_DECREF(value);
3205             return NULL;
3206         }
3207     }
3208 
3209     Py_DECREF(key);
3210     return value;
3211 }
3212 
3213 /* Set the ParseError exception with the given parameters.
3214  * If message is not NULL, it's used as the error string. Otherwise, the
3215  * message string is the default for the given error_code.
3216 */
3217 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3218 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3219                 const char *message)
3220 {
3221     PyObject *errmsg, *error, *position, *code;
3222     elementtreestate *st = ET_STATE_GLOBAL;
3223 
3224     errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3225                 message ? message : EXPAT(ErrorString)(error_code),
3226                 line, column);
3227     if (errmsg == NULL)
3228         return;
3229 
3230     error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
3231     Py_DECREF(errmsg);
3232     if (!error)
3233         return;
3234 
3235     /* Add code and position attributes */
3236     code = PyLong_FromLong((long)error_code);
3237     if (!code) {
3238         Py_DECREF(error);
3239         return;
3240     }
3241     if (PyObject_SetAttrString(error, "code", code) == -1) {
3242         Py_DECREF(error);
3243         Py_DECREF(code);
3244         return;
3245     }
3246     Py_DECREF(code);
3247 
3248     position = Py_BuildValue("(nn)", line, column);
3249     if (!position) {
3250         Py_DECREF(error);
3251         return;
3252     }
3253     if (PyObject_SetAttrString(error, "position", position) == -1) {
3254         Py_DECREF(error);
3255         Py_DECREF(position);
3256         return;
3257     }
3258     Py_DECREF(position);
3259 
3260     PyErr_SetObject(st->parseerror_obj, error);
3261     Py_DECREF(error);
3262 }
3263 
3264 /* -------------------------------------------------------------------- */
3265 /* handlers */
3266 
3267 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3268 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3269                       int data_len)
3270 {
3271     PyObject* key;
3272     PyObject* value;
3273     PyObject* res;
3274 
3275     if (data_len < 2 || data_in[0] != '&')
3276         return;
3277 
3278     if (PyErr_Occurred())
3279         return;
3280 
3281     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3282     if (!key)
3283         return;
3284 
3285     value = PyDict_GetItemWithError(self->entity, key);
3286 
3287     if (value) {
3288         if (TreeBuilder_CheckExact(self->target))
3289             res = treebuilder_handle_data(
3290                 (TreeBuilderObject*) self->target, value
3291                 );
3292         else if (self->handle_data)
3293             res = _PyObject_FastCall(self->handle_data, &value, 1);
3294         else
3295             res = NULL;
3296         Py_XDECREF(res);
3297     } else if (!PyErr_Occurred()) {
3298         /* Report the first error, not the last */
3299         char message[128] = "undefined entity ";
3300         strncat(message, data_in, data_len < 100?data_len:100);
3301         expat_set_error(
3302             XML_ERROR_UNDEFINED_ENTITY,
3303             EXPAT(GetErrorLineNumber)(self->parser),
3304             EXPAT(GetErrorColumnNumber)(self->parser),
3305             message
3306             );
3307     }
3308 
3309     Py_DECREF(key);
3310 }
3311 
3312 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3313 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3314                     const XML_Char **attrib_in)
3315 {
3316     PyObject* res;
3317     PyObject* tag;
3318     PyObject* attrib;
3319     int ok;
3320 
3321     if (PyErr_Occurred())
3322         return;
3323 
3324     /* tag name */
3325     tag = makeuniversal(self, tag_in);
3326     if (!tag)
3327         return; /* parser will look for errors */
3328 
3329     /* attributes */
3330     if (attrib_in[0]) {
3331         attrib = PyDict_New();
3332         if (!attrib) {
3333             Py_DECREF(tag);
3334             return;
3335         }
3336         while (attrib_in[0] && attrib_in[1]) {
3337             PyObject* key = makeuniversal(self, attrib_in[0]);
3338             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3339             if (!key || !value) {
3340                 Py_XDECREF(value);
3341                 Py_XDECREF(key);
3342                 Py_DECREF(attrib);
3343                 Py_DECREF(tag);
3344                 return;
3345             }
3346             ok = PyDict_SetItem(attrib, key, value);
3347             Py_DECREF(value);
3348             Py_DECREF(key);
3349             if (ok < 0) {
3350                 Py_DECREF(attrib);
3351                 Py_DECREF(tag);
3352                 return;
3353             }
3354             attrib_in += 2;
3355         }
3356     } else {
3357         Py_INCREF(Py_None);
3358         attrib = Py_None;
3359     }
3360 
3361     if (TreeBuilder_CheckExact(self->target)) {
3362         /* shortcut */
3363         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3364                                        tag, attrib);
3365     }
3366     else if (self->handle_start) {
3367         if (attrib == Py_None) {
3368             Py_DECREF(attrib);
3369             attrib = PyDict_New();
3370             if (!attrib) {
3371                 Py_DECREF(tag);
3372                 return;
3373             }
3374         }
3375         res = PyObject_CallFunctionObjArgs(self->handle_start,
3376                                            tag, attrib, NULL);
3377     } else
3378         res = NULL;
3379 
3380     Py_DECREF(tag);
3381     Py_DECREF(attrib);
3382 
3383     Py_XDECREF(res);
3384 }
3385 
3386 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3387 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3388                    int data_len)
3389 {
3390     PyObject* data;
3391     PyObject* res;
3392 
3393     if (PyErr_Occurred())
3394         return;
3395 
3396     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3397     if (!data)
3398         return; /* parser will look for errors */
3399 
3400     if (TreeBuilder_CheckExact(self->target))
3401         /* shortcut */
3402         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3403     else if (self->handle_data)
3404         res = _PyObject_FastCall(self->handle_data, &data, 1);
3405     else
3406         res = NULL;
3407 
3408     Py_DECREF(data);
3409 
3410     Py_XDECREF(res);
3411 }
3412 
3413 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3414 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3415 {
3416     PyObject* tag;
3417     PyObject* res = NULL;
3418 
3419     if (PyErr_Occurred())
3420         return;
3421 
3422     if (TreeBuilder_CheckExact(self->target))
3423         /* shortcut */
3424         /* the standard tree builder doesn't look at the end tag */
3425         res = treebuilder_handle_end(
3426             (TreeBuilderObject*) self->target, Py_None
3427             );
3428     else if (self->handle_end) {
3429         tag = makeuniversal(self, tag_in);
3430         if (tag) {
3431             res = _PyObject_FastCall(self->handle_end, &tag, 1);
3432             Py_DECREF(tag);
3433         }
3434     }
3435 
3436     Py_XDECREF(res);
3437 }
3438 
3439 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3440 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3441                        const XML_Char *uri_in)
3442 {
3443     PyObject* res = NULL;
3444     PyObject* uri;
3445     PyObject* prefix;
3446     PyObject* stack[2];
3447 
3448     if (PyErr_Occurred())
3449         return;
3450 
3451     if (!uri_in)
3452         uri_in = "";
3453     if (!prefix_in)
3454         prefix_in = "";
3455 
3456     if (TreeBuilder_CheckExact(self->target)) {
3457         /* shortcut - TreeBuilder does not actually implement .start_ns() */
3458         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3459 
3460         if (target->events_append && target->start_ns_event_obj) {
3461             prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3462             if (!prefix)
3463                 return;
3464             uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3465             if (!uri) {
3466                 Py_DECREF(prefix);
3467                 return;
3468             }
3469 
3470             res = treebuilder_handle_start_ns(target, prefix, uri);
3471             Py_DECREF(uri);
3472             Py_DECREF(prefix);
3473         }
3474     } else if (self->handle_start_ns) {
3475         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3476         if (!prefix)
3477             return;
3478         uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3479         if (!uri) {
3480             Py_DECREF(prefix);
3481             return;
3482         }
3483 
3484         stack[0] = prefix;
3485         stack[1] = uri;
3486         res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3487         Py_DECREF(uri);
3488         Py_DECREF(prefix);
3489     }
3490 
3491     Py_XDECREF(res);
3492 }
3493 
3494 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3495 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3496 {
3497     PyObject *res = NULL;
3498     PyObject* prefix;
3499 
3500     if (PyErr_Occurred())
3501         return;
3502 
3503     if (!prefix_in)
3504         prefix_in = "";
3505 
3506     if (TreeBuilder_CheckExact(self->target)) {
3507         /* shortcut - TreeBuilder does not actually implement .end_ns() */
3508         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3509 
3510         if (target->events_append && target->end_ns_event_obj) {
3511             res = treebuilder_handle_end_ns(target, Py_None);
3512         }
3513     } else if (self->handle_end_ns) {
3514         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3515         if (!prefix)
3516             return;
3517 
3518         res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3519         Py_DECREF(prefix);
3520     }
3521 
3522     Py_XDECREF(res);
3523 }
3524 
3525 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3526 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3527 {
3528     PyObject* comment;
3529     PyObject* res;
3530 
3531     if (PyErr_Occurred())
3532         return;
3533 
3534     if (TreeBuilder_CheckExact(self->target)) {
3535         /* shortcut */
3536         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3537 
3538         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3539         if (!comment)
3540             return; /* parser will look for errors */
3541 
3542         res = treebuilder_handle_comment(target,  comment);
3543         Py_XDECREF(res);
3544         Py_DECREF(comment);
3545     } else if (self->handle_comment) {
3546         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3547         if (!comment)
3548             return;
3549 
3550         res = _PyObject_FastCall(self->handle_comment, &comment, 1);
3551         Py_XDECREF(res);
3552         Py_DECREF(comment);
3553     }
3554 }
3555 
3556 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3557 expat_start_doctype_handler(XMLParserObject *self,
3558                             const XML_Char *doctype_name,
3559                             const XML_Char *sysid,
3560                             const XML_Char *pubid,
3561                             int has_internal_subset)
3562 {
3563     _Py_IDENTIFIER(doctype);
3564     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3565     PyObject *res;
3566 
3567     if (PyErr_Occurred())
3568         return;
3569 
3570     doctype_name_obj = makeuniversal(self, doctype_name);
3571     if (!doctype_name_obj)
3572         return;
3573 
3574     if (sysid) {
3575         sysid_obj = makeuniversal(self, sysid);
3576         if (!sysid_obj) {
3577             Py_DECREF(doctype_name_obj);
3578             return;
3579         }
3580     } else {
3581         Py_INCREF(Py_None);
3582         sysid_obj = Py_None;
3583     }
3584 
3585     if (pubid) {
3586         pubid_obj = makeuniversal(self, pubid);
3587         if (!pubid_obj) {
3588             Py_DECREF(doctype_name_obj);
3589             Py_DECREF(sysid_obj);
3590             return;
3591         }
3592     } else {
3593         Py_INCREF(Py_None);
3594         pubid_obj = Py_None;
3595     }
3596 
3597     /* If the target has a handler for doctype, call it. */
3598     if (self->handle_doctype) {
3599         res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3600                                            doctype_name_obj, pubid_obj,
3601                                            sysid_obj, NULL);
3602         Py_XDECREF(res);
3603     }
3604     else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3605         (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3606                 "The doctype() method of XMLParser is ignored.  "
3607                 "Define doctype() method on the TreeBuilder target.",
3608                 1);
3609         Py_DECREF(res);
3610     }
3611 
3612     Py_DECREF(doctype_name_obj);
3613     Py_DECREF(pubid_obj);
3614     Py_DECREF(sysid_obj);
3615 }
3616 
3617 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3618 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3619                  const XML_Char* data_in)
3620 {
3621     PyObject* pi_target;
3622     PyObject* data;
3623     PyObject* res;
3624     PyObject* stack[2];
3625 
3626     if (PyErr_Occurred())
3627         return;
3628 
3629     if (TreeBuilder_CheckExact(self->target)) {
3630         /* shortcut */
3631         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3632 
3633         if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3634             pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3635             if (!pi_target)
3636                 goto error;
3637             data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3638             if (!data)
3639                 goto error;
3640             res = treebuilder_handle_pi(target, pi_target, data);
3641             Py_XDECREF(res);
3642             Py_DECREF(data);
3643             Py_DECREF(pi_target);
3644         }
3645     } else if (self->handle_pi) {
3646         pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3647         if (!pi_target)
3648             goto error;
3649         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3650         if (!data)
3651             goto error;
3652 
3653         stack[0] = pi_target;
3654         stack[1] = data;
3655         res = _PyObject_FastCall(self->handle_pi, stack, 2);
3656         Py_XDECREF(res);
3657         Py_DECREF(data);
3658         Py_DECREF(pi_target);
3659     }
3660 
3661     return;
3662 
3663   error:
3664     Py_XDECREF(pi_target);
3665     return;
3666 }
3667 
3668 /* -------------------------------------------------------------------- */
3669 
3670 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3671 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3672 {
3673     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3674     if (self) {
3675         self->parser = NULL;
3676         self->target = self->entity = self->names = NULL;
3677         self->handle_start_ns = self->handle_end_ns = NULL;
3678         self->handle_start = self->handle_data = self->handle_end = NULL;
3679         self->handle_comment = self->handle_pi = self->handle_close = NULL;
3680         self->handle_doctype = NULL;
3681     }
3682     return (PyObject *)self;
3683 }
3684 
3685 static int
ignore_attribute_error(PyObject * value)3686 ignore_attribute_error(PyObject *value)
3687 {
3688     if (value == NULL) {
3689         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3690             return -1;
3691         }
3692         PyErr_Clear();
3693     }
3694     return 0;
3695 }
3696 
3697 /*[clinic input]
3698 _elementtree.XMLParser.__init__
3699 
3700     *
3701     target: object = NULL
3702     encoding: str(accept={str, NoneType}) = None
3703 
3704 [clinic start generated code]*/
3705 
3706 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3707 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3708                                      const char *encoding)
3709 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
3710 {
3711     self->entity = PyDict_New();
3712     if (!self->entity)
3713         return -1;
3714 
3715     self->names = PyDict_New();
3716     if (!self->names) {
3717         Py_CLEAR(self->entity);
3718         return -1;
3719     }
3720 
3721     self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3722     if (!self->parser) {
3723         Py_CLEAR(self->entity);
3724         Py_CLEAR(self->names);
3725         PyErr_NoMemory();
3726         return -1;
3727     }
3728     /* expat < 2.1.0 has no XML_SetHashSalt() */
3729     if (EXPAT(SetHashSalt) != NULL) {
3730         EXPAT(SetHashSalt)(self->parser,
3731                            (unsigned long)_Py_HashSecret.expat.hashsalt);
3732     }
3733 
3734     if (target) {
3735         Py_INCREF(target);
3736     } else {
3737         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3738         if (!target) {
3739             Py_CLEAR(self->entity);
3740             Py_CLEAR(self->names);
3741             return -1;
3742         }
3743     }
3744     self->target = target;
3745 
3746     self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3747     if (ignore_attribute_error(self->handle_start_ns)) {
3748         return -1;
3749     }
3750     self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3751     if (ignore_attribute_error(self->handle_end_ns)) {
3752         return -1;
3753     }
3754     self->handle_start = PyObject_GetAttrString(target, "start");
3755     if (ignore_attribute_error(self->handle_start)) {
3756         return -1;
3757     }
3758     self->handle_data = PyObject_GetAttrString(target, "data");
3759     if (ignore_attribute_error(self->handle_data)) {
3760         return -1;
3761     }
3762     self->handle_end = PyObject_GetAttrString(target, "end");
3763     if (ignore_attribute_error(self->handle_end)) {
3764         return -1;
3765     }
3766     self->handle_comment = PyObject_GetAttrString(target, "comment");
3767     if (ignore_attribute_error(self->handle_comment)) {
3768         return -1;
3769     }
3770     self->handle_pi = PyObject_GetAttrString(target, "pi");
3771     if (ignore_attribute_error(self->handle_pi)) {
3772         return -1;
3773     }
3774     self->handle_close = PyObject_GetAttrString(target, "close");
3775     if (ignore_attribute_error(self->handle_close)) {
3776         return -1;
3777     }
3778     self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3779     if (ignore_attribute_error(self->handle_doctype)) {
3780         return -1;
3781     }
3782 
3783     /* configure parser */
3784     EXPAT(SetUserData)(self->parser, self);
3785     if (self->handle_start_ns || self->handle_end_ns)
3786         EXPAT(SetNamespaceDeclHandler)(
3787             self->parser,
3788             (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3789             (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3790             );
3791     EXPAT(SetElementHandler)(
3792         self->parser,
3793         (XML_StartElementHandler) expat_start_handler,
3794         (XML_EndElementHandler) expat_end_handler
3795         );
3796     EXPAT(SetDefaultHandlerExpand)(
3797         self->parser,
3798         (XML_DefaultHandler) expat_default_handler
3799         );
3800     EXPAT(SetCharacterDataHandler)(
3801         self->parser,
3802         (XML_CharacterDataHandler) expat_data_handler
3803         );
3804     if (self->handle_comment)
3805         EXPAT(SetCommentHandler)(
3806             self->parser,
3807             (XML_CommentHandler) expat_comment_handler
3808             );
3809     if (self->handle_pi)
3810         EXPAT(SetProcessingInstructionHandler)(
3811             self->parser,
3812             (XML_ProcessingInstructionHandler) expat_pi_handler
3813             );
3814     EXPAT(SetStartDoctypeDeclHandler)(
3815         self->parser,
3816         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3817         );
3818     EXPAT(SetUnknownEncodingHandler)(
3819         self->parser,
3820         EXPAT(DefaultUnknownEncodingHandler), NULL
3821         );
3822 
3823     return 0;
3824 }
3825 
3826 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3827 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3828 {
3829     Py_VISIT(self->handle_close);
3830     Py_VISIT(self->handle_pi);
3831     Py_VISIT(self->handle_comment);
3832     Py_VISIT(self->handle_end);
3833     Py_VISIT(self->handle_data);
3834     Py_VISIT(self->handle_start);
3835     Py_VISIT(self->handle_start_ns);
3836     Py_VISIT(self->handle_end_ns);
3837     Py_VISIT(self->handle_doctype);
3838 
3839     Py_VISIT(self->target);
3840     Py_VISIT(self->entity);
3841     Py_VISIT(self->names);
3842 
3843     return 0;
3844 }
3845 
3846 static int
xmlparser_gc_clear(XMLParserObject * self)3847 xmlparser_gc_clear(XMLParserObject *self)
3848 {
3849     if (self->parser != NULL) {
3850         XML_Parser parser = self->parser;
3851         self->parser = NULL;
3852         EXPAT(ParserFree)(parser);
3853     }
3854 
3855     Py_CLEAR(self->handle_close);
3856     Py_CLEAR(self->handle_pi);
3857     Py_CLEAR(self->handle_comment);
3858     Py_CLEAR(self->handle_end);
3859     Py_CLEAR(self->handle_data);
3860     Py_CLEAR(self->handle_start);
3861     Py_CLEAR(self->handle_start_ns);
3862     Py_CLEAR(self->handle_end_ns);
3863     Py_CLEAR(self->handle_doctype);
3864 
3865     Py_CLEAR(self->target);
3866     Py_CLEAR(self->entity);
3867     Py_CLEAR(self->names);
3868 
3869     return 0;
3870 }
3871 
3872 static void
xmlparser_dealloc(XMLParserObject * self)3873 xmlparser_dealloc(XMLParserObject* self)
3874 {
3875     PyObject_GC_UnTrack(self);
3876     xmlparser_gc_clear(self);
3877     Py_TYPE(self)->tp_free((PyObject *)self);
3878 }
3879 
3880 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3881 _check_xmlparser(XMLParserObject* self)
3882 {
3883     if (self->target == NULL) {
3884         PyErr_SetString(PyExc_ValueError,
3885                         "XMLParser.__init__() wasn't called");
3886         return 0;
3887     }
3888     return 1;
3889 }
3890 
3891 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3892 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3893 {
3894     int ok;
3895 
3896     assert(!PyErr_Occurred());
3897     ok = EXPAT(Parse)(self->parser, data, data_len, final);
3898 
3899     if (PyErr_Occurred())
3900         return NULL;
3901 
3902     if (!ok) {
3903         expat_set_error(
3904             EXPAT(GetErrorCode)(self->parser),
3905             EXPAT(GetErrorLineNumber)(self->parser),
3906             EXPAT(GetErrorColumnNumber)(self->parser),
3907             NULL
3908             );
3909         return NULL;
3910     }
3911 
3912     Py_RETURN_NONE;
3913 }
3914 
3915 /*[clinic input]
3916 _elementtree.XMLParser.close
3917 
3918 [clinic start generated code]*/
3919 
3920 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3921 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3922 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3923 {
3924     /* end feeding data to parser */
3925 
3926     PyObject* res;
3927 
3928     if (!_check_xmlparser(self)) {
3929         return NULL;
3930     }
3931     res = expat_parse(self, "", 0, 1);
3932     if (!res)
3933         return NULL;
3934 
3935     if (TreeBuilder_CheckExact(self->target)) {
3936         Py_DECREF(res);
3937         return treebuilder_done((TreeBuilderObject*) self->target);
3938     }
3939     else if (self->handle_close) {
3940         Py_DECREF(res);
3941         return _PyObject_CallNoArg(self->handle_close);
3942     }
3943     else {
3944         return res;
3945     }
3946 }
3947 
3948 /*[clinic input]
3949 _elementtree.XMLParser.feed
3950 
3951     data: object
3952     /
3953 
3954 [clinic start generated code]*/
3955 
3956 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3957 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3958 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3959 {
3960     /* feed data to parser */
3961 
3962     if (!_check_xmlparser(self)) {
3963         return NULL;
3964     }
3965     if (PyUnicode_Check(data)) {
3966         Py_ssize_t data_len;
3967         const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3968         if (data_ptr == NULL)
3969             return NULL;
3970         if (data_len > INT_MAX) {
3971             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3972             return NULL;
3973         }
3974         /* Explicitly set UTF-8 encoding. Return code ignored. */
3975         (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3976         return expat_parse(self, data_ptr, (int)data_len, 0);
3977     }
3978     else {
3979         Py_buffer view;
3980         PyObject *res;
3981         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3982             return NULL;
3983         if (view.len > INT_MAX) {
3984             PyBuffer_Release(&view);
3985             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3986             return NULL;
3987         }
3988         res = expat_parse(self, view.buf, (int)view.len, 0);
3989         PyBuffer_Release(&view);
3990         return res;
3991     }
3992 }
3993 
3994 /*[clinic input]
3995 _elementtree.XMLParser._parse_whole
3996 
3997     file: object
3998     /
3999 
4000 [clinic start generated code]*/
4001 
4002 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)4003 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
4004 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
4005 {
4006     /* (internal) parse the whole input, until end of stream */
4007     PyObject* reader;
4008     PyObject* buffer;
4009     PyObject* temp;
4010     PyObject* res;
4011 
4012     if (!_check_xmlparser(self)) {
4013         return NULL;
4014     }
4015     reader = PyObject_GetAttrString(file, "read");
4016     if (!reader)
4017         return NULL;
4018 
4019     /* read from open file object */
4020     for (;;) {
4021 
4022         buffer = PyObject_CallFunction(reader, "i", 64*1024);
4023 
4024         if (!buffer) {
4025             /* read failed (e.g. due to KeyboardInterrupt) */
4026             Py_DECREF(reader);
4027             return NULL;
4028         }
4029 
4030         if (PyUnicode_CheckExact(buffer)) {
4031             /* A unicode object is encoded into bytes using UTF-8 */
4032             if (PyUnicode_GET_LENGTH(buffer) == 0) {
4033                 Py_DECREF(buffer);
4034                 break;
4035             }
4036             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
4037             Py_DECREF(buffer);
4038             if (!temp) {
4039                 /* Propagate exception from PyUnicode_AsEncodedString */
4040                 Py_DECREF(reader);
4041                 return NULL;
4042             }
4043             buffer = temp;
4044         }
4045         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
4046             Py_DECREF(buffer);
4047             break;
4048         }
4049 
4050         if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
4051             Py_DECREF(buffer);
4052             Py_DECREF(reader);
4053             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4054             return NULL;
4055         }
4056         res = expat_parse(
4057             self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
4058             );
4059 
4060         Py_DECREF(buffer);
4061 
4062         if (!res) {
4063             Py_DECREF(reader);
4064             return NULL;
4065         }
4066         Py_DECREF(res);
4067 
4068     }
4069 
4070     Py_DECREF(reader);
4071 
4072     res = expat_parse(self, "", 0, 1);
4073 
4074     if (res && TreeBuilder_CheckExact(self->target)) {
4075         Py_DECREF(res);
4076         return treebuilder_done((TreeBuilderObject*) self->target);
4077     }
4078 
4079     return res;
4080 }
4081 
4082 /*[clinic input]
4083 _elementtree.XMLParser._setevents
4084 
4085     events_queue: object
4086     events_to_report: object = None
4087     /
4088 
4089 [clinic start generated code]*/
4090 
4091 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4092 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4093                                        PyObject *events_queue,
4094                                        PyObject *events_to_report)
4095 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4096 {
4097     /* activate element event reporting */
4098     Py_ssize_t i;
4099     TreeBuilderObject *target;
4100     PyObject *events_append, *events_seq;
4101 
4102     if (!_check_xmlparser(self)) {
4103         return NULL;
4104     }
4105     if (!TreeBuilder_CheckExact(self->target)) {
4106         PyErr_SetString(
4107             PyExc_TypeError,
4108             "event handling only supported for ElementTree.TreeBuilder "
4109             "targets"
4110             );
4111         return NULL;
4112     }
4113 
4114     target = (TreeBuilderObject*) self->target;
4115 
4116     events_append = PyObject_GetAttrString(events_queue, "append");
4117     if (events_append == NULL)
4118         return NULL;
4119     Py_XSETREF(target->events_append, events_append);
4120 
4121     /* clear out existing events */
4122     Py_CLEAR(target->start_event_obj);
4123     Py_CLEAR(target->end_event_obj);
4124     Py_CLEAR(target->start_ns_event_obj);
4125     Py_CLEAR(target->end_ns_event_obj);
4126     Py_CLEAR(target->comment_event_obj);
4127     Py_CLEAR(target->pi_event_obj);
4128 
4129     if (events_to_report == Py_None) {
4130         /* default is "end" only */
4131         target->end_event_obj = PyUnicode_FromString("end");
4132         Py_RETURN_NONE;
4133     }
4134 
4135     if (!(events_seq = PySequence_Fast(events_to_report,
4136                                        "events must be a sequence"))) {
4137         return NULL;
4138     }
4139 
4140     for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4141         PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4142         const char *event_name = NULL;
4143         if (PyUnicode_Check(event_name_obj)) {
4144             event_name = PyUnicode_AsUTF8(event_name_obj);
4145         } else if (PyBytes_Check(event_name_obj)) {
4146             event_name = PyBytes_AS_STRING(event_name_obj);
4147         }
4148         if (event_name == NULL) {
4149             Py_DECREF(events_seq);
4150             PyErr_Format(PyExc_ValueError, "invalid events sequence");
4151             return NULL;
4152         }
4153 
4154         Py_INCREF(event_name_obj);
4155         if (strcmp(event_name, "start") == 0) {
4156             Py_XSETREF(target->start_event_obj, event_name_obj);
4157         } else if (strcmp(event_name, "end") == 0) {
4158             Py_XSETREF(target->end_event_obj, event_name_obj);
4159         } else if (strcmp(event_name, "start-ns") == 0) {
4160             Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4161             EXPAT(SetNamespaceDeclHandler)(
4162                 self->parser,
4163                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4164                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4165                 );
4166         } else if (strcmp(event_name, "end-ns") == 0) {
4167             Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4168             EXPAT(SetNamespaceDeclHandler)(
4169                 self->parser,
4170                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4171                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4172                 );
4173         } else if (strcmp(event_name, "comment") == 0) {
4174             Py_XSETREF(target->comment_event_obj, event_name_obj);
4175             EXPAT(SetCommentHandler)(
4176                 self->parser,
4177                 (XML_CommentHandler) expat_comment_handler
4178                 );
4179         } else if (strcmp(event_name, "pi") == 0) {
4180             Py_XSETREF(target->pi_event_obj, event_name_obj);
4181             EXPAT(SetProcessingInstructionHandler)(
4182                 self->parser,
4183                 (XML_ProcessingInstructionHandler) expat_pi_handler
4184                 );
4185         } else {
4186             Py_DECREF(event_name_obj);
4187             Py_DECREF(events_seq);
4188             PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4189             return NULL;
4190         }
4191     }
4192 
4193     Py_DECREF(events_seq);
4194     Py_RETURN_NONE;
4195 }
4196 
4197 static PyMemberDef xmlparser_members[] = {
4198     {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4199     {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4200     {NULL}
4201 };
4202 
4203 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4204 xmlparser_version_getter(XMLParserObject *self, void *closure)
4205 {
4206     return PyUnicode_FromFormat(
4207         "Expat %d.%d.%d", XML_MAJOR_VERSION,
4208         XML_MINOR_VERSION, XML_MICRO_VERSION);
4209 }
4210 
4211 static PyGetSetDef xmlparser_getsetlist[] = {
4212     {"version", (getter)xmlparser_version_getter, NULL, NULL},
4213     {NULL},
4214 };
4215 
4216 #include "clinic/_elementtree.c.h"
4217 
4218 static PyMethodDef element_methods[] = {
4219 
4220     _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4221 
4222     _ELEMENTTREE_ELEMENT_GET_METHODDEF
4223     _ELEMENTTREE_ELEMENT_SET_METHODDEF
4224 
4225     _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4226     _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4227     _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4228 
4229     _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4230     _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4231     _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4232     _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4233 
4234     _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4235     _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4236     _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4237 
4238     _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
4239     _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4240 
4241     _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4242     _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4243 
4244     _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4245 
4246     _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4247     _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4248     _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4249     _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4250     _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4251 
4252     {NULL, NULL}
4253 };
4254 
4255 static PyMappingMethods element_as_mapping = {
4256     (lenfunc) element_length,
4257     (binaryfunc) element_subscr,
4258     (objobjargproc) element_ass_subscr,
4259 };
4260 
4261 static PyGetSetDef element_getsetlist[] = {
4262     {"tag",
4263         (getter)element_tag_getter,
4264         (setter)element_tag_setter,
4265         "A string identifying what kind of data this element represents"},
4266     {"text",
4267         (getter)element_text_getter,
4268         (setter)element_text_setter,
4269         "A string of text directly after the start tag, or None"},
4270     {"tail",
4271         (getter)element_tail_getter,
4272         (setter)element_tail_setter,
4273         "A string of text directly after the end tag, or None"},
4274     {"attrib",
4275         (getter)element_attrib_getter,
4276         (setter)element_attrib_setter,
4277         "A dictionary containing the element's attributes"},
4278     {NULL},
4279 };
4280 
4281 static PyTypeObject Element_Type = {
4282     PyVarObject_HEAD_INIT(NULL, 0)
4283     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4284     /* methods */
4285     (destructor)element_dealloc,                    /* tp_dealloc */
4286     0,                                              /* tp_vectorcall_offset */
4287     0,                                              /* tp_getattr */
4288     0,                                              /* tp_setattr */
4289     0,                                              /* tp_as_async */
4290     (reprfunc)element_repr,                         /* tp_repr */
4291     0,                                              /* tp_as_number */
4292     &element_as_sequence,                           /* tp_as_sequence */
4293     &element_as_mapping,                            /* tp_as_mapping */
4294     0,                                              /* tp_hash */
4295     0,                                              /* tp_call */
4296     0,                                              /* tp_str */
4297     PyObject_GenericGetAttr,                        /* tp_getattro */
4298     0,                                              /* tp_setattro */
4299     0,                                              /* tp_as_buffer */
4300     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4301                                                     /* tp_flags */
4302     0,                                              /* tp_doc */
4303     (traverseproc)element_gc_traverse,              /* tp_traverse */
4304     (inquiry)element_gc_clear,                      /* tp_clear */
4305     0,                                              /* tp_richcompare */
4306     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
4307     0,                                              /* tp_iter */
4308     0,                                              /* tp_iternext */
4309     element_methods,                                /* tp_methods */
4310     0,                                              /* tp_members */
4311     element_getsetlist,                             /* tp_getset */
4312     0,                                              /* tp_base */
4313     0,                                              /* tp_dict */
4314     0,                                              /* tp_descr_get */
4315     0,                                              /* tp_descr_set */
4316     0,                                              /* tp_dictoffset */
4317     (initproc)element_init,                         /* tp_init */
4318     PyType_GenericAlloc,                            /* tp_alloc */
4319     element_new,                                    /* tp_new */
4320     0,                                              /* tp_free */
4321 };
4322 
4323 static PyMethodDef treebuilder_methods[] = {
4324     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4325     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4326     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4327     _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4328     _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4329     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4330     {NULL, NULL}
4331 };
4332 
4333 static PyTypeObject TreeBuilder_Type = {
4334     PyVarObject_HEAD_INIT(NULL, 0)
4335     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4336     /* methods */
4337     (destructor)treebuilder_dealloc,                /* tp_dealloc */
4338     0,                                              /* tp_vectorcall_offset */
4339     0,                                              /* tp_getattr */
4340     0,                                              /* tp_setattr */
4341     0,                                              /* tp_as_async */
4342     0,                                              /* tp_repr */
4343     0,                                              /* tp_as_number */
4344     0,                                              /* tp_as_sequence */
4345     0,                                              /* tp_as_mapping */
4346     0,                                              /* tp_hash */
4347     0,                                              /* tp_call */
4348     0,                                              /* tp_str */
4349     0,                                              /* tp_getattro */
4350     0,                                              /* tp_setattro */
4351     0,                                              /* tp_as_buffer */
4352     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4353                                                     /* tp_flags */
4354     0,                                              /* tp_doc */
4355     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
4356     (inquiry)treebuilder_gc_clear,                  /* tp_clear */
4357     0,                                              /* tp_richcompare */
4358     0,                                              /* tp_weaklistoffset */
4359     0,                                              /* tp_iter */
4360     0,                                              /* tp_iternext */
4361     treebuilder_methods,                            /* tp_methods */
4362     0,                                              /* tp_members */
4363     0,                                              /* tp_getset */
4364     0,                                              /* tp_base */
4365     0,                                              /* tp_dict */
4366     0,                                              /* tp_descr_get */
4367     0,                                              /* tp_descr_set */
4368     0,                                              /* tp_dictoffset */
4369     _elementtree_TreeBuilder___init__,              /* tp_init */
4370     PyType_GenericAlloc,                            /* tp_alloc */
4371     treebuilder_new,                                /* tp_new */
4372     0,                                              /* tp_free */
4373 };
4374 
4375 static PyMethodDef xmlparser_methods[] = {
4376     _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4377     _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4378     _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4379     _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4380     {NULL, NULL}
4381 };
4382 
4383 static PyTypeObject XMLParser_Type = {
4384     PyVarObject_HEAD_INIT(NULL, 0)
4385     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4386     /* methods */
4387     (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4388     0,                                              /* tp_vectorcall_offset */
4389     0,                                              /* tp_getattr */
4390     0,                                              /* tp_setattr */
4391     0,                                              /* tp_as_async */
4392     0,                                              /* tp_repr */
4393     0,                                              /* tp_as_number */
4394     0,                                              /* tp_as_sequence */
4395     0,                                              /* tp_as_mapping */
4396     0,                                              /* tp_hash */
4397     0,                                              /* tp_call */
4398     0,                                              /* tp_str */
4399     0,                                              /* tp_getattro */
4400     0,                                              /* tp_setattro */
4401     0,                                              /* tp_as_buffer */
4402     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4403                                                     /* tp_flags */
4404     0,                                              /* tp_doc */
4405     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4406     (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4407     0,                                              /* tp_richcompare */
4408     0,                                              /* tp_weaklistoffset */
4409     0,                                              /* tp_iter */
4410     0,                                              /* tp_iternext */
4411     xmlparser_methods,                              /* tp_methods */
4412     xmlparser_members,                              /* tp_members */
4413     xmlparser_getsetlist,                           /* tp_getset */
4414     0,                                              /* tp_base */
4415     0,                                              /* tp_dict */
4416     0,                                              /* tp_descr_get */
4417     0,                                              /* tp_descr_set */
4418     0,                                              /* tp_dictoffset */
4419     _elementtree_XMLParser___init__,                /* tp_init */
4420     PyType_GenericAlloc,                            /* tp_alloc */
4421     xmlparser_new,                                  /* tp_new */
4422     0,                                              /* tp_free */
4423 };
4424 
4425 /* ==================================================================== */
4426 /* python module interface */
4427 
4428 static PyMethodDef _functions[] = {
4429     {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4430     _ELEMENTTREE__SET_FACTORIES_METHODDEF
4431     {NULL, NULL}
4432 };
4433 
4434 
4435 static struct PyModuleDef elementtreemodule = {
4436     PyModuleDef_HEAD_INIT,
4437     "_elementtree",
4438     NULL,
4439     sizeof(elementtreestate),
4440     _functions,
4441     NULL,
4442     elementtree_traverse,
4443     elementtree_clear,
4444     elementtree_free
4445 };
4446 
4447 PyMODINIT_FUNC
PyInit__elementtree(void)4448 PyInit__elementtree(void)
4449 {
4450     PyObject *m, *temp;
4451     elementtreestate *st;
4452 
4453     m = PyState_FindModule(&elementtreemodule);
4454     if (m) {
4455         Py_INCREF(m);
4456         return m;
4457     }
4458 
4459     /* Initialize object types */
4460     if (PyType_Ready(&ElementIter_Type) < 0)
4461         return NULL;
4462     if (PyType_Ready(&TreeBuilder_Type) < 0)
4463         return NULL;
4464     if (PyType_Ready(&Element_Type) < 0)
4465         return NULL;
4466     if (PyType_Ready(&XMLParser_Type) < 0)
4467         return NULL;
4468 
4469     m = PyModule_Create(&elementtreemodule);
4470     if (!m)
4471         return NULL;
4472     st = ET_STATE(m);
4473 
4474     if (!(temp = PyImport_ImportModule("copy")))
4475         return NULL;
4476     st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4477     Py_XDECREF(temp);
4478 
4479     if (st->deepcopy_obj == NULL) {
4480         return NULL;
4481     }
4482 
4483     assert(!PyErr_Occurred());
4484     if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4485         return NULL;
4486 
4487     /* link against pyexpat */
4488     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4489     if (expat_capi) {
4490         /* check that it's usable */
4491         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4492             (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4493             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4494             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4495             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4496             PyErr_SetString(PyExc_ImportError,
4497                             "pyexpat version is incompatible");
4498             return NULL;
4499         }
4500     } else {
4501         return NULL;
4502     }
4503 
4504     st->parseerror_obj = PyErr_NewException(
4505         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4506         );
4507     Py_INCREF(st->parseerror_obj);
4508     PyModule_AddObject(m, "ParseError", st->parseerror_obj);
4509 
4510     Py_INCREF((PyObject *)&Element_Type);
4511     PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4512 
4513     Py_INCREF((PyObject *)&TreeBuilder_Type);
4514     PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4515 
4516     Py_INCREF((PyObject *)&XMLParser_Type);
4517     PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
4518 
4519     return m;
4520 }
4521