1 /*--------------------------------------------------------------------
2  * Licensed to PSF under a Contributor Agreement.
3  * See https://www.python.org/psf/license for licensing details.
4  *
5  * _elementtree - C accelerator for xml.etree.ElementTree
6  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7  * Copyright (c) 1999-2009 by Fredrik Lundh.
8  *
9  * info@pythonware.com
10  * http://www.pythonware.com
11  *--------------------------------------------------------------------
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 
16 #include "Python.h"
17 #include "structmember.h"         // PyMemberDef
18 
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21 
22 /* An element can hold this many children without extra memory
23    allocations. */
24 #define STATIC_CHILDREN 4
25 
26 /* For best performance, chose a value so that 80-90% of all nodes
27    have no more than the given number of children.  Set this to zero
28    to minimize the size of the element structure itself (this only
29    helps if you have lots of leaf nodes with attributes). */
30 
31 /* Also note that pymalloc always allocates blocks in multiples of
32    eight bytes.  For the current C version of ElementTree, this means
33    that the number of children should be an even number, at least on
34    32-bit platforms. */
35 
36 /* -------------------------------------------------------------------- */
37 
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48 
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55 
56 /* macros used to store 'join' flags in string object pointers.  note
57    that all use of text and tail as object pointers must be wrapped in
58    JOIN_OBJ.  see comments in the ElementObject definition for more
59    info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63 
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68     PyObject *tmp = JOIN_OBJ(*p);
69     *p = new_joined_ptr;
70     Py_DECREF(tmp);
71 }
72 
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74  * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78     if (*p) {
79         _set_joined_ptr(p, NULL);
80     }
81 }
82 
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88 
89 
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92     PyObject *parseerror_obj;
93     PyObject *deepcopy_obj;
94     PyObject *elementpath_obj;
95     PyObject *comment_factory;
96     PyObject *pi_factory;
97 } elementtreestate;
98 
99 static struct PyModuleDef elementtreemodule;
100 
101 /* Given a module object (assumed to be _elementtree), get its per-module
102  * state.
103  */
104 static inline elementtreestate*
get_elementtree_state(PyObject * module)105 get_elementtree_state(PyObject *module)
106 {
107     void *state = PyModule_GetState(module);
108     assert(state != NULL);
109     return (elementtreestate *)state;
110 }
111 
112 /* Find the module instance imported in the currently running sub-interpreter
113  * and get its state.
114  */
115 #define ET_STATE_GLOBAL \
116     ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117 
118 static int
elementtree_clear(PyObject * m)119 elementtree_clear(PyObject *m)
120 {
121     elementtreestate *st = get_elementtree_state(m);
122     Py_CLEAR(st->parseerror_obj);
123     Py_CLEAR(st->deepcopy_obj);
124     Py_CLEAR(st->elementpath_obj);
125     Py_CLEAR(st->comment_factory);
126     Py_CLEAR(st->pi_factory);
127     return 0;
128 }
129 
130 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)131 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132 {
133     elementtreestate *st = get_elementtree_state(m);
134     Py_VISIT(st->parseerror_obj);
135     Py_VISIT(st->deepcopy_obj);
136     Py_VISIT(st->elementpath_obj);
137     Py_VISIT(st->comment_factory);
138     Py_VISIT(st->pi_factory);
139     return 0;
140 }
141 
142 static void
elementtree_free(void * m)143 elementtree_free(void *m)
144 {
145     elementtree_clear((PyObject *)m);
146 }
147 
148 /* helpers */
149 
150 LOCAL(PyObject*)
list_join(PyObject * list)151 list_join(PyObject* list)
152 {
153     /* join list elements */
154     PyObject* joiner;
155     PyObject* result;
156 
157     joiner = PyUnicode_FromStringAndSize("", 0);
158     if (!joiner)
159         return NULL;
160     result = PyUnicode_Join(joiner, list);
161     Py_DECREF(joiner);
162     return result;
163 }
164 
165 /* Is the given object an empty dictionary?
166 */
167 static int
is_empty_dict(PyObject * obj)168 is_empty_dict(PyObject *obj)
169 {
170     return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
171 }
172 
173 
174 /* -------------------------------------------------------------------- */
175 /* the Element type */
176 
177 typedef struct {
178 
179     /* attributes (a dictionary object), or NULL if no attributes */
180     PyObject* attrib;
181 
182     /* child elements */
183     Py_ssize_t length; /* actual number of items */
184     Py_ssize_t allocated; /* allocated items */
185 
186     /* this either points to _children or to a malloced buffer */
187     PyObject* *children;
188 
189     PyObject* _children[STATIC_CHILDREN];
190 
191 } ElementObjectExtra;
192 
193 typedef struct {
194     PyObject_HEAD
195 
196     /* element tag (a string). */
197     PyObject* tag;
198 
199     /* text before first child.  note that this is a tagged pointer;
200        use JOIN_OBJ to get the object pointer.  the join flag is used
201        to distinguish lists created by the tree builder from lists
202        assigned to the attribute by application code; the former
203        should be joined before being returned to the user, the latter
204        should be left intact. */
205     PyObject* text;
206 
207     /* text after this element, in parent.  note that this is a tagged
208        pointer; use JOIN_OBJ to get the object pointer. */
209     PyObject* tail;
210 
211     ElementObjectExtra* extra;
212 
213     PyObject *weakreflist; /* For tp_weaklistoffset */
214 
215 } ElementObject;
216 
217 
218 #define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
219 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220 
221 
222 /* -------------------------------------------------------------------- */
223 /* Element constructors and destructor */
224 
225 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)226 create_extra(ElementObject* self, PyObject* attrib)
227 {
228     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
229     if (!self->extra) {
230         PyErr_NoMemory();
231         return -1;
232     }
233 
234     Py_XINCREF(attrib);
235     self->extra->attrib = attrib;
236 
237     self->extra->length = 0;
238     self->extra->allocated = STATIC_CHILDREN;
239     self->extra->children = self->extra->_children;
240 
241     return 0;
242 }
243 
244 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)245 dealloc_extra(ElementObjectExtra *extra)
246 {
247     Py_ssize_t i;
248 
249     if (!extra)
250         return;
251 
252     Py_XDECREF(extra->attrib);
253 
254     for (i = 0; i < extra->length; i++)
255         Py_DECREF(extra->children[i]);
256 
257     if (extra->children != extra->_children)
258         PyObject_Free(extra->children);
259 
260     PyObject_Free(extra);
261 }
262 
263 LOCAL(void)
clear_extra(ElementObject * self)264 clear_extra(ElementObject* self)
265 {
266     ElementObjectExtra *myextra;
267 
268     if (!self->extra)
269         return;
270 
271     /* Avoid DECREFs calling into this code again (cycles, etc.)
272     */
273     myextra = self->extra;
274     self->extra = NULL;
275 
276     dealloc_extra(myextra);
277 }
278 
279 /* Convenience internal function to create new Element objects with the given
280  * tag and attributes.
281 */
282 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)283 create_new_element(PyObject* tag, PyObject* attrib)
284 {
285     ElementObject* self;
286 
287     self = PyObject_GC_New(ElementObject, &Element_Type);
288     if (self == NULL)
289         return NULL;
290     self->extra = NULL;
291 
292     Py_INCREF(tag);
293     self->tag = tag;
294 
295     Py_INCREF(Py_None);
296     self->text = Py_None;
297 
298     Py_INCREF(Py_None);
299     self->tail = Py_None;
300 
301     self->weakreflist = NULL;
302 
303     ALLOC(sizeof(ElementObject), "create element");
304     PyObject_GC_Track(self);
305 
306     if (attrib != NULL && !is_empty_dict(attrib)) {
307         if (create_extra(self, attrib) < 0) {
308             Py_DECREF(self);
309             return NULL;
310         }
311     }
312 
313     return (PyObject*) self;
314 }
315 
316 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)317 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318 {
319     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320     if (e != NULL) {
321         Py_INCREF(Py_None);
322         e->tag = Py_None;
323 
324         Py_INCREF(Py_None);
325         e->text = Py_None;
326 
327         Py_INCREF(Py_None);
328         e->tail = Py_None;
329 
330         e->extra = NULL;
331         e->weakreflist = NULL;
332     }
333     return (PyObject *)e;
334 }
335 
336 /* Helper function for extracting the attrib dictionary from a keywords dict.
337  * This is required by some constructors/functions in this module that can
338  * either accept attrib as a keyword argument or all attributes splashed
339  * directly into *kwds.
340  *
341  * Return a dictionary with the content of kwds merged into the content of
342  * attrib. If there is no attrib keyword, return a copy of kwds.
343  */
344 static PyObject*
get_attrib_from_keywords(PyObject * kwds)345 get_attrib_from_keywords(PyObject *kwds)
346 {
347     PyObject *attrib_str = PyUnicode_FromString("attrib");
348     if (attrib_str == NULL) {
349         return NULL;
350     }
351     PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
352 
353     if (attrib) {
354         /* If attrib was found in kwds, copy its value and remove it from
355          * kwds
356          */
357         if (!PyDict_Check(attrib)) {
358             Py_DECREF(attrib_str);
359             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360                          Py_TYPE(attrib)->tp_name);
361             return NULL;
362         }
363         attrib = PyDict_Copy(attrib);
364         if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365             Py_DECREF(attrib);
366             attrib = NULL;
367         }
368     }
369     else if (!PyErr_Occurred()) {
370         attrib = PyDict_New();
371     }
372 
373     Py_DECREF(attrib_str);
374 
375     if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376         Py_DECREF(attrib);
377         return NULL;
378     }
379     return attrib;
380 }
381 
382 /*[clinic input]
383 module _elementtree
384 class _elementtree.Element "ElementObject *" "&Element_Type"
385 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387 [clinic start generated code]*/
388 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389 
390 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)391 element_init(PyObject *self, PyObject *args, PyObject *kwds)
392 {
393     PyObject *tag;
394     PyObject *attrib = NULL;
395     ElementObject *self_elem;
396 
397     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398         return -1;
399 
400     if (attrib) {
401         /* attrib passed as positional arg */
402         attrib = PyDict_Copy(attrib);
403         if (!attrib)
404             return -1;
405         if (kwds) {
406             if (PyDict_Update(attrib, kwds) < 0) {
407                 Py_DECREF(attrib);
408                 return -1;
409             }
410         }
411     } else if (kwds) {
412         /* have keywords args */
413         attrib = get_attrib_from_keywords(kwds);
414         if (!attrib)
415             return -1;
416     }
417 
418     self_elem = (ElementObject *)self;
419 
420     if (attrib != NULL && !is_empty_dict(attrib)) {
421         if (create_extra(self_elem, attrib) < 0) {
422             Py_DECREF(attrib);
423             return -1;
424         }
425     }
426 
427     /* We own a reference to attrib here and it's no longer needed. */
428     Py_XDECREF(attrib);
429 
430     /* Replace the objects already pointed to by tag, text and tail. */
431     Py_INCREF(tag);
432     Py_XSETREF(self_elem->tag, tag);
433 
434     Py_INCREF(Py_None);
435     _set_joined_ptr(&self_elem->text, Py_None);
436 
437     Py_INCREF(Py_None);
438     _set_joined_ptr(&self_elem->tail, Py_None);
439 
440     return 0;
441 }
442 
443 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)444 element_resize(ElementObject* self, Py_ssize_t extra)
445 {
446     Py_ssize_t size;
447     PyObject* *children;
448 
449     assert(extra >= 0);
450     /* make sure self->children can hold the given number of extra
451        elements.  set an exception and return -1 if allocation failed */
452 
453     if (!self->extra) {
454         if (create_extra(self, NULL) < 0)
455             return -1;
456     }
457 
458     size = self->extra->length + extra;  /* never overflows */
459 
460     if (size > self->extra->allocated) {
461         /* use Python 2.4's list growth strategy */
462         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
463         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
464          * which needs at least 4 bytes.
465          * Although it's a false alarm always assume at least one child to
466          * be safe.
467          */
468         size = size ? size : 1;
469         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470             goto nomemory;
471         if (self->extra->children != self->extra->_children) {
472             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
473              * "children", which needs at least 4 bytes. Although it's a
474              * false alarm always assume at least one child to be safe.
475              */
476             children = PyObject_Realloc(self->extra->children,
477                                         size * sizeof(PyObject*));
478             if (!children)
479                 goto nomemory;
480         } else {
481             children = PyObject_Malloc(size * sizeof(PyObject*));
482             if (!children)
483                 goto nomemory;
484             /* copy existing children from static area to malloc buffer */
485             memcpy(children, self->extra->children,
486                    self->extra->length * sizeof(PyObject*));
487         }
488         self->extra->children = children;
489         self->extra->allocated = size;
490     }
491 
492     return 0;
493 
494   nomemory:
495     PyErr_NoMemory();
496     return -1;
497 }
498 
499 LOCAL(void)
raise_type_error(PyObject * element)500 raise_type_error(PyObject *element)
501 {
502     PyErr_Format(PyExc_TypeError,
503                  "expected an Element, not \"%.200s\"",
504                  Py_TYPE(element)->tp_name);
505 }
506 
507 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)508 element_add_subelement(ElementObject* self, PyObject* element)
509 {
510     /* add a child element to a parent */
511 
512     if (!Element_Check(element)) {
513         raise_type_error(element);
514         return -1;
515     }
516 
517     if (element_resize(self, 1) < 0)
518         return -1;
519 
520     Py_INCREF(element);
521     self->extra->children[self->extra->length] = element;
522 
523     self->extra->length++;
524 
525     return 0;
526 }
527 
528 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)529 element_get_attrib(ElementObject* self)
530 {
531     /* return borrowed reference to attrib dictionary */
532     /* note: this function assumes that the extra section exists */
533 
534     PyObject* res = self->extra->attrib;
535 
536     if (!res) {
537         /* create missing dictionary */
538         res = self->extra->attrib = PyDict_New();
539     }
540 
541     return res;
542 }
543 
544 LOCAL(PyObject*)
element_get_text(ElementObject * self)545 element_get_text(ElementObject* self)
546 {
547     /* return borrowed reference to text attribute */
548 
549     PyObject *res = self->text;
550 
551     if (JOIN_GET(res)) {
552         res = JOIN_OBJ(res);
553         if (PyList_CheckExact(res)) {
554             PyObject *tmp = list_join(res);
555             if (!tmp)
556                 return NULL;
557             self->text = tmp;
558             Py_DECREF(res);
559             res = tmp;
560         }
561     }
562 
563     return res;
564 }
565 
566 LOCAL(PyObject*)
element_get_tail(ElementObject * self)567 element_get_tail(ElementObject* self)
568 {
569     /* return borrowed reference to text attribute */
570 
571     PyObject *res = self->tail;
572 
573     if (JOIN_GET(res)) {
574         res = JOIN_OBJ(res);
575         if (PyList_CheckExact(res)) {
576             PyObject *tmp = list_join(res);
577             if (!tmp)
578                 return NULL;
579             self->tail = tmp;
580             Py_DECREF(res);
581             res = tmp;
582         }
583     }
584 
585     return res;
586 }
587 
588 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)589 subelement(PyObject *self, PyObject *args, PyObject *kwds)
590 {
591     PyObject* elem;
592 
593     ElementObject* parent;
594     PyObject* tag;
595     PyObject* attrib = NULL;
596     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597                           &Element_Type, &parent, &tag,
598                           &PyDict_Type, &attrib)) {
599         return NULL;
600     }
601 
602     if (attrib) {
603         /* attrib passed as positional arg */
604         attrib = PyDict_Copy(attrib);
605         if (!attrib)
606             return NULL;
607         if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608             Py_DECREF(attrib);
609             return NULL;
610         }
611     } else if (kwds) {
612         /* have keyword args */
613         attrib = get_attrib_from_keywords(kwds);
614         if (!attrib)
615             return NULL;
616     } else {
617         /* no attrib arg, no kwds, so no attribute */
618     }
619 
620     elem = create_new_element(tag, attrib);
621     Py_XDECREF(attrib);
622     if (elem == NULL)
623         return NULL;
624 
625     if (element_add_subelement(parent, elem) < 0) {
626         Py_DECREF(elem);
627         return NULL;
628     }
629 
630     return elem;
631 }
632 
633 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)634 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635 {
636     Py_VISIT(self->tag);
637     Py_VISIT(JOIN_OBJ(self->text));
638     Py_VISIT(JOIN_OBJ(self->tail));
639 
640     if (self->extra) {
641         Py_ssize_t i;
642         Py_VISIT(self->extra->attrib);
643 
644         for (i = 0; i < self->extra->length; ++i)
645             Py_VISIT(self->extra->children[i]);
646     }
647     return 0;
648 }
649 
650 static int
element_gc_clear(ElementObject * self)651 element_gc_clear(ElementObject *self)
652 {
653     Py_CLEAR(self->tag);
654     _clear_joined_ptr(&self->text);
655     _clear_joined_ptr(&self->tail);
656 
657     /* After dropping all references from extra, it's no longer valid anyway,
658      * so fully deallocate it.
659     */
660     clear_extra(self);
661     return 0;
662 }
663 
664 static void
element_dealloc(ElementObject * self)665 element_dealloc(ElementObject* self)
666 {
667     /* bpo-31095: UnTrack is needed before calling any callbacks */
668     PyObject_GC_UnTrack(self);
669     Py_TRASHCAN_BEGIN(self, element_dealloc)
670 
671     if (self->weakreflist != NULL)
672         PyObject_ClearWeakRefs((PyObject *) self);
673 
674     /* element_gc_clear clears all references and deallocates extra
675     */
676     element_gc_clear(self);
677 
678     RELEASE(sizeof(ElementObject), "destroy element");
679     Py_TYPE(self)->tp_free((PyObject *)self);
680     Py_TRASHCAN_END
681 }
682 
683 /* -------------------------------------------------------------------- */
684 
685 /*[clinic input]
686 _elementtree.Element.append
687 
688     subelement: object(subclass_of='&Element_Type')
689     /
690 
691 [clinic start generated code]*/
692 
693 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)694 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696 {
697     if (element_add_subelement(self, subelement) < 0)
698         return NULL;
699 
700     Py_RETURN_NONE;
701 }
702 
703 /*[clinic input]
704 _elementtree.Element.clear
705 
706 [clinic start generated code]*/
707 
708 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)709 _elementtree_Element_clear_impl(ElementObject *self)
710 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711 {
712     clear_extra(self);
713 
714     Py_INCREF(Py_None);
715     _set_joined_ptr(&self->text, Py_None);
716 
717     Py_INCREF(Py_None);
718     _set_joined_ptr(&self->tail, Py_None);
719 
720     Py_RETURN_NONE;
721 }
722 
723 /*[clinic input]
724 _elementtree.Element.__copy__
725 
726 [clinic start generated code]*/
727 
728 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)729 _elementtree_Element___copy___impl(ElementObject *self)
730 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
731 {
732     Py_ssize_t i;
733     ElementObject* element;
734 
735     element = (ElementObject*) create_new_element(
736         self->tag, self->extra ? self->extra->attrib : NULL);
737     if (!element)
738         return NULL;
739 
740     Py_INCREF(JOIN_OBJ(self->text));
741     _set_joined_ptr(&element->text, self->text);
742 
743     Py_INCREF(JOIN_OBJ(self->tail));
744     _set_joined_ptr(&element->tail, self->tail);
745 
746     assert(!element->extra || !element->extra->length);
747     if (self->extra) {
748         if (element_resize(element, self->extra->length) < 0) {
749             Py_DECREF(element);
750             return NULL;
751         }
752 
753         for (i = 0; i < self->extra->length; i++) {
754             Py_INCREF(self->extra->children[i]);
755             element->extra->children[i] = self->extra->children[i];
756         }
757 
758         assert(!element->extra->length);
759         element->extra->length = self->extra->length;
760     }
761 
762     return (PyObject*) element;
763 }
764 
765 /* Helper for a deep copy. */
766 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767 
768 /*[clinic input]
769 _elementtree.Element.__deepcopy__
770 
771     memo: object(subclass_of="&PyDict_Type")
772     /
773 
774 [clinic start generated code]*/
775 
776 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)777 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
779 {
780     Py_ssize_t i;
781     ElementObject* element;
782     PyObject* tag;
783     PyObject* attrib;
784     PyObject* text;
785     PyObject* tail;
786     PyObject* id;
787 
788     tag = deepcopy(self->tag, memo);
789     if (!tag)
790         return NULL;
791 
792     if (self->extra && self->extra->attrib) {
793         attrib = deepcopy(self->extra->attrib, memo);
794         if (!attrib) {
795             Py_DECREF(tag);
796             return NULL;
797         }
798     } else {
799         attrib = NULL;
800     }
801 
802     element = (ElementObject*) create_new_element(tag, attrib);
803 
804     Py_DECREF(tag);
805     Py_XDECREF(attrib);
806 
807     if (!element)
808         return NULL;
809 
810     text = deepcopy(JOIN_OBJ(self->text), memo);
811     if (!text)
812         goto error;
813     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
814 
815     tail = deepcopy(JOIN_OBJ(self->tail), memo);
816     if (!tail)
817         goto error;
818     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
819 
820     assert(!element->extra || !element->extra->length);
821     if (self->extra) {
822         if (element_resize(element, self->extra->length) < 0)
823             goto error;
824 
825         for (i = 0; i < self->extra->length; i++) {
826             PyObject* child = deepcopy(self->extra->children[i], memo);
827             if (!child || !Element_Check(child)) {
828                 if (child) {
829                     raise_type_error(child);
830                     Py_DECREF(child);
831                 }
832                 element->extra->length = i;
833                 goto error;
834             }
835             element->extra->children[i] = child;
836         }
837 
838         assert(!element->extra->length);
839         element->extra->length = self->extra->length;
840     }
841 
842     /* add object to memo dictionary (so deepcopy won't visit it again) */
843     id = PyLong_FromSsize_t((uintptr_t) self);
844     if (!id)
845         goto error;
846 
847     i = PyDict_SetItem(memo, id, (PyObject*) element);
848 
849     Py_DECREF(id);
850 
851     if (i < 0)
852         goto error;
853 
854     return (PyObject*) element;
855 
856   error:
857     Py_DECREF(element);
858     return NULL;
859 }
860 
861 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)862 deepcopy(PyObject *object, PyObject *memo)
863 {
864     /* do a deep copy of the given object */
865     elementtreestate *st;
866     PyObject *stack[2];
867 
868     /* Fast paths */
869     if (object == Py_None || PyUnicode_CheckExact(object)) {
870         Py_INCREF(object);
871         return object;
872     }
873 
874     if (Py_REFCNT(object) == 1) {
875         if (PyDict_CheckExact(object)) {
876             PyObject *key, *value;
877             Py_ssize_t pos = 0;
878             int simple = 1;
879             while (PyDict_Next(object, &pos, &key, &value)) {
880                 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881                     simple = 0;
882                     break;
883                 }
884             }
885             if (simple)
886                 return PyDict_Copy(object);
887             /* Fall through to general case */
888         }
889         else if (Element_CheckExact(object)) {
890             return _elementtree_Element___deepcopy___impl(
891                 (ElementObject *)object, memo);
892         }
893     }
894 
895     /* General case */
896     st = ET_STATE_GLOBAL;
897     if (!st->deepcopy_obj) {
898         PyErr_SetString(PyExc_RuntimeError,
899                         "deepcopy helper not found");
900         return NULL;
901     }
902 
903     stack[0] = object;
904     stack[1] = memo;
905     return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
906 }
907 
908 
909 /*[clinic input]
910 _elementtree.Element.__sizeof__ -> Py_ssize_t
911 
912 [clinic start generated code]*/
913 
914 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)915 _elementtree_Element___sizeof___impl(ElementObject *self)
916 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
917 {
918     Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
919     if (self->extra) {
920         result += sizeof(ElementObjectExtra);
921         if (self->extra->children != self->extra->_children)
922             result += sizeof(PyObject*) * self->extra->allocated;
923     }
924     return result;
925 }
926 
927 /* dict keys for getstate/setstate. */
928 #define PICKLED_TAG "tag"
929 #define PICKLED_CHILDREN "_children"
930 #define PICKLED_ATTRIB "attrib"
931 #define PICKLED_TAIL "tail"
932 #define PICKLED_TEXT "text"
933 
934 /* __getstate__ returns a fabricated instance dict as in the pure-Python
935  * Element implementation, for interoperability/interchangeability.  This
936  * makes the pure-Python implementation details an API, but (a) there aren't
937  * any unnecessary structures there; and (b) it buys compatibility with 3.2
938  * pickles.  See issue #16076.
939  */
940 /*[clinic input]
941 _elementtree.Element.__getstate__
942 
943 [clinic start generated code]*/
944 
945 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)946 _elementtree_Element___getstate___impl(ElementObject *self)
947 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
948 {
949     Py_ssize_t i;
950     PyObject *children, *attrib;
951 
952     /* Build a list of children. */
953     children = PyList_New(self->extra ? self->extra->length : 0);
954     if (!children)
955         return NULL;
956     for (i = 0; i < PyList_GET_SIZE(children); i++) {
957         PyObject *child = self->extra->children[i];
958         Py_INCREF(child);
959         PyList_SET_ITEM(children, i, child);
960     }
961 
962     if (self->extra && self->extra->attrib) {
963         attrib = self->extra->attrib;
964         Py_INCREF(attrib);
965     }
966     else {
967         attrib = PyDict_New();
968         if (!attrib) {
969             Py_DECREF(children);
970             return NULL;
971         }
972     }
973 
974     return Py_BuildValue("{sOsNsNsOsO}",
975                          PICKLED_TAG, self->tag,
976                          PICKLED_CHILDREN, children,
977                          PICKLED_ATTRIB, attrib,
978                          PICKLED_TEXT, JOIN_OBJ(self->text),
979                          PICKLED_TAIL, JOIN_OBJ(self->tail));
980 }
981 
982 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)983 element_setstate_from_attributes(ElementObject *self,
984                                  PyObject *tag,
985                                  PyObject *attrib,
986                                  PyObject *text,
987                                  PyObject *tail,
988                                  PyObject *children)
989 {
990     Py_ssize_t i, nchildren;
991     ElementObjectExtra *oldextra = NULL;
992 
993     if (!tag) {
994         PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995         return NULL;
996     }
997 
998     Py_INCREF(tag);
999     Py_XSETREF(self->tag, tag);
1000 
1001     text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002     Py_INCREF(JOIN_OBJ(text));
1003     _set_joined_ptr(&self->text, text);
1004 
1005     tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006     Py_INCREF(JOIN_OBJ(tail));
1007     _set_joined_ptr(&self->tail, tail);
1008 
1009     /* Handle ATTRIB and CHILDREN. */
1010     if (!children && !attrib) {
1011         Py_RETURN_NONE;
1012     }
1013 
1014     /* Compute 'nchildren'. */
1015     if (children) {
1016         if (!PyList_Check(children)) {
1017             PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018             return NULL;
1019         }
1020         nchildren = PyList_GET_SIZE(children);
1021 
1022         /* (Re-)allocate 'extra'.
1023            Avoid DECREFs calling into this code again (cycles, etc.)
1024          */
1025         oldextra = self->extra;
1026         self->extra = NULL;
1027         if (element_resize(self, nchildren)) {
1028             assert(!self->extra || !self->extra->length);
1029             clear_extra(self);
1030             self->extra = oldextra;
1031             return NULL;
1032         }
1033         assert(self->extra);
1034         assert(self->extra->allocated >= nchildren);
1035         if (oldextra) {
1036             assert(self->extra->attrib == NULL);
1037             self->extra->attrib = oldextra->attrib;
1038             oldextra->attrib = NULL;
1039         }
1040 
1041         /* Copy children */
1042         for (i = 0; i < nchildren; i++) {
1043             PyObject *child = PyList_GET_ITEM(children, i);
1044             if (!Element_Check(child)) {
1045                 raise_type_error(child);
1046                 self->extra->length = i;
1047                 dealloc_extra(oldextra);
1048                 return NULL;
1049             }
1050             Py_INCREF(child);
1051             self->extra->children[i] = child;
1052         }
1053 
1054         assert(!self->extra->length);
1055         self->extra->length = nchildren;
1056     }
1057     else {
1058         if (element_resize(self, 0)) {
1059             return NULL;
1060         }
1061     }
1062 
1063     /* Stash attrib. */
1064     Py_XINCREF(attrib);
1065     Py_XSETREF(self->extra->attrib, attrib);
1066     dealloc_extra(oldextra);
1067 
1068     Py_RETURN_NONE;
1069 }
1070 
1071 /* __setstate__ for Element instance from the Python implementation.
1072  * 'state' should be the instance dict.
1073  */
1074 
1075 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1076 element_setstate_from_Python(ElementObject *self, PyObject *state)
1077 {
1078     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079                              PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080     PyObject *args;
1081     PyObject *tag, *attrib, *text, *tail, *children;
1082     PyObject *retval;
1083 
1084     tag = attrib = text = tail = children = NULL;
1085     args = PyTuple_New(0);
1086     if (!args)
1087         return NULL;
1088 
1089     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090                                     &attrib, &text, &tail, &children))
1091         retval = element_setstate_from_attributes(self, tag, attrib, text,
1092                                                   tail, children);
1093     else
1094         retval = NULL;
1095 
1096     Py_DECREF(args);
1097     return retval;
1098 }
1099 
1100 /*[clinic input]
1101 _elementtree.Element.__setstate__
1102 
1103     state: object
1104     /
1105 
1106 [clinic start generated code]*/
1107 
1108 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1109 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1111 {
1112     if (!PyDict_CheckExact(state)) {
1113         PyErr_Format(PyExc_TypeError,
1114                      "Don't know how to unpickle \"%.200R\" as an Element",
1115                      state);
1116         return NULL;
1117     }
1118     else
1119         return element_setstate_from_Python(self, state);
1120 }
1121 
1122 LOCAL(int)
checkpath(PyObject * tag)1123 checkpath(PyObject* tag)
1124 {
1125     Py_ssize_t i;
1126     int check = 1;
1127 
1128     /* check if a tag contains an xpath character */
1129 
1130 #define PATHCHAR(ch) \
1131     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1132 
1133     if (PyUnicode_Check(tag)) {
1134         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1135         const void *data = PyUnicode_DATA(tag);
1136         unsigned int kind = PyUnicode_KIND(tag);
1137         if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138                 PyUnicode_READ(kind, data, 1) == '}' || (
1139                 PyUnicode_READ(kind, data, 1) == '*' &&
1140                 PyUnicode_READ(kind, data, 2) == '}'))) {
1141             /* wildcard: '{}tag' or '{*}tag' */
1142             return 1;
1143         }
1144         for (i = 0; i < len; i++) {
1145             Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146             if (ch == '{')
1147                 check = 0;
1148             else if (ch == '}')
1149                 check = 1;
1150             else if (check && PATHCHAR(ch))
1151                 return 1;
1152         }
1153         return 0;
1154     }
1155     if (PyBytes_Check(tag)) {
1156         const char *p = PyBytes_AS_STRING(tag);
1157         const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158         if (len >= 3 && p[0] == '{' && (
1159                 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1160             /* wildcard: '{}tag' or '{*}tag' */
1161             return 1;
1162         }
1163         for (i = 0; i < len; i++) {
1164             if (p[i] == '{')
1165                 check = 0;
1166             else if (p[i] == '}')
1167                 check = 1;
1168             else if (check && PATHCHAR(p[i]))
1169                 return 1;
1170         }
1171         return 0;
1172     }
1173 
1174     return 1; /* unknown type; might be path expression */
1175 }
1176 
1177 /*[clinic input]
1178 _elementtree.Element.extend
1179 
1180     elements: object
1181     /
1182 
1183 [clinic start generated code]*/
1184 
1185 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1186 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1188 {
1189     PyObject* seq;
1190     Py_ssize_t i;
1191 
1192     seq = PySequence_Fast(elements, "");
1193     if (!seq) {
1194         PyErr_Format(
1195             PyExc_TypeError,
1196             "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1197             );
1198         return NULL;
1199     }
1200 
1201     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1202         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1203         Py_INCREF(element);
1204         if (element_add_subelement(self, element) < 0) {
1205             Py_DECREF(seq);
1206             Py_DECREF(element);
1207             return NULL;
1208         }
1209         Py_DECREF(element);
1210     }
1211 
1212     Py_DECREF(seq);
1213 
1214     Py_RETURN_NONE;
1215 }
1216 
1217 /*[clinic input]
1218 _elementtree.Element.find
1219 
1220     path: object
1221     namespaces: object = None
1222 
1223 [clinic start generated code]*/
1224 
1225 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1226 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227                                PyObject *namespaces)
1228 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1229 {
1230     Py_ssize_t i;
1231     elementtreestate *st = ET_STATE_GLOBAL;
1232 
1233     if (checkpath(path) || namespaces != Py_None) {
1234         _Py_IDENTIFIER(find);
1235         return _PyObject_CallMethodIdObjArgs(
1236             st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1237             );
1238     }
1239 
1240     if (!self->extra)
1241         Py_RETURN_NONE;
1242 
1243     for (i = 0; i < self->extra->length; i++) {
1244         PyObject* item = self->extra->children[i];
1245         int rc;
1246         assert(Element_Check(item));
1247         Py_INCREF(item);
1248         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1249         if (rc > 0)
1250             return item;
1251         Py_DECREF(item);
1252         if (rc < 0)
1253             return NULL;
1254     }
1255 
1256     Py_RETURN_NONE;
1257 }
1258 
1259 /*[clinic input]
1260 _elementtree.Element.findtext
1261 
1262     path: object
1263     default: object = None
1264     namespaces: object = None
1265 
1266 [clinic start generated code]*/
1267 
1268 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1269 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270                                    PyObject *default_value,
1271                                    PyObject *namespaces)
1272 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1273 {
1274     Py_ssize_t i;
1275     _Py_IDENTIFIER(findtext);
1276     elementtreestate *st = ET_STATE_GLOBAL;
1277 
1278     if (checkpath(path) || namespaces != Py_None)
1279         return _PyObject_CallMethodIdObjArgs(
1280             st->elementpath_obj, &PyId_findtext,
1281             self, path, default_value, namespaces, NULL
1282             );
1283 
1284     if (!self->extra) {
1285         Py_INCREF(default_value);
1286         return default_value;
1287     }
1288 
1289     for (i = 0; i < self->extra->length; i++) {
1290         PyObject *item = self->extra->children[i];
1291         int rc;
1292         assert(Element_Check(item));
1293         Py_INCREF(item);
1294         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1295         if (rc > 0) {
1296             PyObject* text = element_get_text((ElementObject*)item);
1297             if (text == Py_None) {
1298                 Py_DECREF(item);
1299                 return PyUnicode_New(0, 0);
1300             }
1301             Py_XINCREF(text);
1302             Py_DECREF(item);
1303             return text;
1304         }
1305         Py_DECREF(item);
1306         if (rc < 0)
1307             return NULL;
1308     }
1309 
1310     Py_INCREF(default_value);
1311     return default_value;
1312 }
1313 
1314 /*[clinic input]
1315 _elementtree.Element.findall
1316 
1317     path: object
1318     namespaces: object = None
1319 
1320 [clinic start generated code]*/
1321 
1322 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1323 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324                                   PyObject *namespaces)
1325 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1326 {
1327     Py_ssize_t i;
1328     PyObject* out;
1329     elementtreestate *st = ET_STATE_GLOBAL;
1330 
1331     if (checkpath(path) || namespaces != Py_None) {
1332         _Py_IDENTIFIER(findall);
1333         return _PyObject_CallMethodIdObjArgs(
1334             st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1335             );
1336     }
1337 
1338     out = PyList_New(0);
1339     if (!out)
1340         return NULL;
1341 
1342     if (!self->extra)
1343         return out;
1344 
1345     for (i = 0; i < self->extra->length; i++) {
1346         PyObject* item = self->extra->children[i];
1347         int rc;
1348         assert(Element_Check(item));
1349         Py_INCREF(item);
1350         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1351         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352             Py_DECREF(item);
1353             Py_DECREF(out);
1354             return NULL;
1355         }
1356         Py_DECREF(item);
1357     }
1358 
1359     return out;
1360 }
1361 
1362 /*[clinic input]
1363 _elementtree.Element.iterfind
1364 
1365     path: object
1366     namespaces: object = None
1367 
1368 [clinic start generated code]*/
1369 
1370 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1371 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372                                    PyObject *namespaces)
1373 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374 {
1375     PyObject* tag = path;
1376     _Py_IDENTIFIER(iterfind);
1377     elementtreestate *st = ET_STATE_GLOBAL;
1378 
1379     return _PyObject_CallMethodIdObjArgs(
1380         st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1381 }
1382 
1383 /*[clinic input]
1384 _elementtree.Element.get
1385 
1386     key: object
1387     default: object = None
1388 
1389 [clinic start generated code]*/
1390 
1391 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1392 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393                               PyObject *default_value)
1394 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1395 {
1396     PyObject* value;
1397 
1398     if (!self->extra || !self->extra->attrib)
1399         value = default_value;
1400     else {
1401         value = PyDict_GetItemWithError(self->extra->attrib, key);
1402         if (!value) {
1403             if (PyErr_Occurred()) {
1404                 return NULL;
1405             }
1406             value = default_value;
1407         }
1408     }
1409 
1410     Py_INCREF(value);
1411     return value;
1412 }
1413 
1414 static PyObject *
1415 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1416 
1417 
1418 /*[clinic input]
1419 _elementtree.Element.iter
1420 
1421     tag: object = None
1422 
1423 [clinic start generated code]*/
1424 
1425 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1426 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1427 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1428 {
1429     if (PyUnicode_Check(tag)) {
1430         if (PyUnicode_READY(tag) < 0)
1431             return NULL;
1432         if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1433             tag = Py_None;
1434     }
1435     else if (PyBytes_Check(tag)) {
1436         if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1437             tag = Py_None;
1438     }
1439 
1440     return create_elementiter(self, tag, 0);
1441 }
1442 
1443 
1444 /*[clinic input]
1445 _elementtree.Element.itertext
1446 
1447 [clinic start generated code]*/
1448 
1449 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1450 _elementtree_Element_itertext_impl(ElementObject *self)
1451 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1452 {
1453     return create_elementiter(self, Py_None, 1);
1454 }
1455 
1456 
1457 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1458 element_getitem(PyObject* self_, Py_ssize_t index)
1459 {
1460     ElementObject* self = (ElementObject*) self_;
1461 
1462     if (!self->extra || index < 0 || index >= self->extra->length) {
1463         PyErr_SetString(
1464             PyExc_IndexError,
1465             "child index out of range"
1466             );
1467         return NULL;
1468     }
1469 
1470     Py_INCREF(self->extra->children[index]);
1471     return self->extra->children[index];
1472 }
1473 
1474 /*[clinic input]
1475 _elementtree.Element.insert
1476 
1477     index: Py_ssize_t
1478     subelement: object(subclass_of='&Element_Type')
1479     /
1480 
1481 [clinic start generated code]*/
1482 
1483 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1484 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1485                                  PyObject *subelement)
1486 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1487 {
1488     Py_ssize_t i;
1489 
1490     if (!self->extra) {
1491         if (create_extra(self, NULL) < 0)
1492             return NULL;
1493     }
1494 
1495     if (index < 0) {
1496         index += self->extra->length;
1497         if (index < 0)
1498             index = 0;
1499     }
1500     if (index > self->extra->length)
1501         index = self->extra->length;
1502 
1503     if (element_resize(self, 1) < 0)
1504         return NULL;
1505 
1506     for (i = self->extra->length; i > index; i--)
1507         self->extra->children[i] = self->extra->children[i-1];
1508 
1509     Py_INCREF(subelement);
1510     self->extra->children[index] = subelement;
1511 
1512     self->extra->length++;
1513 
1514     Py_RETURN_NONE;
1515 }
1516 
1517 /*[clinic input]
1518 _elementtree.Element.items
1519 
1520 [clinic start generated code]*/
1521 
1522 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1523 _elementtree_Element_items_impl(ElementObject *self)
1524 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1525 {
1526     if (!self->extra || !self->extra->attrib)
1527         return PyList_New(0);
1528 
1529     return PyDict_Items(self->extra->attrib);
1530 }
1531 
1532 /*[clinic input]
1533 _elementtree.Element.keys
1534 
1535 [clinic start generated code]*/
1536 
1537 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1538 _elementtree_Element_keys_impl(ElementObject *self)
1539 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1540 {
1541     if (!self->extra || !self->extra->attrib)
1542         return PyList_New(0);
1543 
1544     return PyDict_Keys(self->extra->attrib);
1545 }
1546 
1547 static Py_ssize_t
element_length(ElementObject * self)1548 element_length(ElementObject* self)
1549 {
1550     if (!self->extra)
1551         return 0;
1552 
1553     return self->extra->length;
1554 }
1555 
1556 /*[clinic input]
1557 _elementtree.Element.makeelement
1558 
1559     tag: object
1560     attrib: object(subclass_of='&PyDict_Type')
1561     /
1562 
1563 [clinic start generated code]*/
1564 
1565 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1566 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1567                                       PyObject *attrib)
1568 /*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1569 {
1570     PyObject* elem;
1571 
1572     attrib = PyDict_Copy(attrib);
1573     if (!attrib)
1574         return NULL;
1575 
1576     elem = create_new_element(tag, attrib);
1577 
1578     Py_DECREF(attrib);
1579 
1580     return elem;
1581 }
1582 
1583 /*[clinic input]
1584 _elementtree.Element.remove
1585 
1586     subelement: object(subclass_of='&Element_Type')
1587     /
1588 
1589 [clinic start generated code]*/
1590 
1591 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1592 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1593 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1594 {
1595     Py_ssize_t i;
1596     int rc;
1597     PyObject *found;
1598 
1599     if (!self->extra) {
1600         /* element has no children, so raise exception */
1601         PyErr_SetString(
1602             PyExc_ValueError,
1603             "list.remove(x): x not in list"
1604             );
1605         return NULL;
1606     }
1607 
1608     for (i = 0; i < self->extra->length; i++) {
1609         if (self->extra->children[i] == subelement)
1610             break;
1611         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1612         if (rc > 0)
1613             break;
1614         if (rc < 0)
1615             return NULL;
1616     }
1617 
1618     if (i >= self->extra->length) {
1619         /* subelement is not in children, so raise exception */
1620         PyErr_SetString(
1621             PyExc_ValueError,
1622             "list.remove(x): x not in list"
1623             );
1624         return NULL;
1625     }
1626 
1627     found = self->extra->children[i];
1628 
1629     self->extra->length--;
1630     for (; i < self->extra->length; i++)
1631         self->extra->children[i] = self->extra->children[i+1];
1632 
1633     Py_DECREF(found);
1634     Py_RETURN_NONE;
1635 }
1636 
1637 static PyObject*
element_repr(ElementObject * self)1638 element_repr(ElementObject* self)
1639 {
1640     int status;
1641 
1642     if (self->tag == NULL)
1643         return PyUnicode_FromFormat("<Element at %p>", self);
1644 
1645     status = Py_ReprEnter((PyObject *)self);
1646     if (status == 0) {
1647         PyObject *res;
1648         res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1649         Py_ReprLeave((PyObject *)self);
1650         return res;
1651     }
1652     if (status > 0)
1653         PyErr_Format(PyExc_RuntimeError,
1654                      "reentrant call inside %s.__repr__",
1655                      Py_TYPE(self)->tp_name);
1656     return NULL;
1657 }
1658 
1659 /*[clinic input]
1660 _elementtree.Element.set
1661 
1662     key: object
1663     value: object
1664     /
1665 
1666 [clinic start generated code]*/
1667 
1668 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1669 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1670                               PyObject *value)
1671 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1672 {
1673     PyObject* attrib;
1674 
1675     if (!self->extra) {
1676         if (create_extra(self, NULL) < 0)
1677             return NULL;
1678     }
1679 
1680     attrib = element_get_attrib(self);
1681     if (!attrib)
1682         return NULL;
1683 
1684     if (PyDict_SetItem(attrib, key, value) < 0)
1685         return NULL;
1686 
1687     Py_RETURN_NONE;
1688 }
1689 
1690 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1691 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1692 {
1693     ElementObject* self = (ElementObject*) self_;
1694     Py_ssize_t i;
1695     PyObject* old;
1696 
1697     if (!self->extra || index < 0 || index >= self->extra->length) {
1698         PyErr_SetString(
1699             PyExc_IndexError,
1700             "child assignment index out of range");
1701         return -1;
1702     }
1703 
1704     old = self->extra->children[index];
1705 
1706     if (item) {
1707         if (!Element_Check(item)) {
1708             raise_type_error(item);
1709             return -1;
1710         }
1711         Py_INCREF(item);
1712         self->extra->children[index] = item;
1713     } else {
1714         self->extra->length--;
1715         for (i = index; i < self->extra->length; i++)
1716             self->extra->children[i] = self->extra->children[i+1];
1717     }
1718 
1719     Py_DECREF(old);
1720 
1721     return 0;
1722 }
1723 
1724 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1725 element_subscr(PyObject* self_, PyObject* item)
1726 {
1727     ElementObject* self = (ElementObject*) self_;
1728 
1729     if (PyIndex_Check(item)) {
1730         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1731 
1732         if (i == -1 && PyErr_Occurred()) {
1733             return NULL;
1734         }
1735         if (i < 0 && self->extra)
1736             i += self->extra->length;
1737         return element_getitem(self_, i);
1738     }
1739     else if (PySlice_Check(item)) {
1740         Py_ssize_t start, stop, step, slicelen, i;
1741         size_t cur;
1742         PyObject* list;
1743 
1744         if (!self->extra)
1745             return PyList_New(0);
1746 
1747         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1748             return NULL;
1749         }
1750         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1751                                          step);
1752 
1753         if (slicelen <= 0)
1754             return PyList_New(0);
1755         else {
1756             list = PyList_New(slicelen);
1757             if (!list)
1758                 return NULL;
1759 
1760             for (cur = start, i = 0; i < slicelen;
1761                  cur += step, i++) {
1762                 PyObject* item = self->extra->children[cur];
1763                 Py_INCREF(item);
1764                 PyList_SET_ITEM(list, i, item);
1765             }
1766 
1767             return list;
1768         }
1769     }
1770     else {
1771         PyErr_SetString(PyExc_TypeError,
1772                 "element indices must be integers");
1773         return NULL;
1774     }
1775 }
1776 
1777 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1778 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1779 {
1780     ElementObject* self = (ElementObject*) self_;
1781 
1782     if (PyIndex_Check(item)) {
1783         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1784 
1785         if (i == -1 && PyErr_Occurred()) {
1786             return -1;
1787         }
1788         if (i < 0 && self->extra)
1789             i += self->extra->length;
1790         return element_setitem(self_, i, value);
1791     }
1792     else if (PySlice_Check(item)) {
1793         Py_ssize_t start, stop, step, slicelen, newlen, i;
1794         size_t cur;
1795 
1796         PyObject* recycle = NULL;
1797         PyObject* seq;
1798 
1799         if (!self->extra) {
1800             if (create_extra(self, NULL) < 0)
1801                 return -1;
1802         }
1803 
1804         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1805             return -1;
1806         }
1807         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1808                                          step);
1809 
1810         if (value == NULL) {
1811             /* Delete slice */
1812             size_t cur;
1813             Py_ssize_t i;
1814 
1815             if (slicelen <= 0)
1816                 return 0;
1817 
1818             /* Since we're deleting, the direction of the range doesn't matter,
1819              * so for simplicity make it always ascending.
1820             */
1821             if (step < 0) {
1822                 stop = start + 1;
1823                 start = stop + step * (slicelen - 1) - 1;
1824                 step = -step;
1825             }
1826 
1827             assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1828 
1829             /* recycle is a list that will contain all the children
1830              * scheduled for removal.
1831             */
1832             if (!(recycle = PyList_New(slicelen))) {
1833                 return -1;
1834             }
1835 
1836             /* This loop walks over all the children that have to be deleted,
1837              * with cur pointing at them. num_moved is the amount of children
1838              * until the next deleted child that have to be "shifted down" to
1839              * occupy the deleted's places.
1840              * Note that in the ith iteration, shifting is done i+i places down
1841              * because i children were already removed.
1842             */
1843             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1844                 /* Compute how many children have to be moved, clipping at the
1845                  * list end.
1846                 */
1847                 Py_ssize_t num_moved = step - 1;
1848                 if (cur + step >= (size_t)self->extra->length) {
1849                     num_moved = self->extra->length - cur - 1;
1850                 }
1851 
1852                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1853 
1854                 memmove(
1855                     self->extra->children + cur - i,
1856                     self->extra->children + cur + 1,
1857                     num_moved * sizeof(PyObject *));
1858             }
1859 
1860             /* Leftover "tail" after the last removed child */
1861             cur = start + (size_t)slicelen * step;
1862             if (cur < (size_t)self->extra->length) {
1863                 memmove(
1864                     self->extra->children + cur - slicelen,
1865                     self->extra->children + cur,
1866                     (self->extra->length - cur) * sizeof(PyObject *));
1867             }
1868 
1869             self->extra->length -= slicelen;
1870 
1871             /* Discard the recycle list with all the deleted sub-elements */
1872             Py_DECREF(recycle);
1873             return 0;
1874         }
1875 
1876         /* A new slice is actually being assigned */
1877         seq = PySequence_Fast(value, "");
1878         if (!seq) {
1879             PyErr_Format(
1880                 PyExc_TypeError,
1881                 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1882                 );
1883             return -1;
1884         }
1885         newlen = PySequence_Fast_GET_SIZE(seq);
1886 
1887         if (step !=  1 && newlen != slicelen)
1888         {
1889             Py_DECREF(seq);
1890             PyErr_Format(PyExc_ValueError,
1891                 "attempt to assign sequence of size %zd "
1892                 "to extended slice of size %zd",
1893                 newlen, slicelen
1894                 );
1895             return -1;
1896         }
1897 
1898         /* Resize before creating the recycle bin, to prevent refleaks. */
1899         if (newlen > slicelen) {
1900             if (element_resize(self, newlen - slicelen) < 0) {
1901                 Py_DECREF(seq);
1902                 return -1;
1903             }
1904         }
1905 
1906         for (i = 0; i < newlen; i++) {
1907             PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1908             if (!Element_Check(element)) {
1909                 raise_type_error(element);
1910                 Py_DECREF(seq);
1911                 return -1;
1912             }
1913         }
1914 
1915         if (slicelen > 0) {
1916             /* to avoid recursive calls to this method (via decref), move
1917                old items to the recycle bin here, and get rid of them when
1918                we're done modifying the element */
1919             recycle = PyList_New(slicelen);
1920             if (!recycle) {
1921                 Py_DECREF(seq);
1922                 return -1;
1923             }
1924             for (cur = start, i = 0; i < slicelen;
1925                  cur += step, i++)
1926                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1927         }
1928 
1929         if (newlen < slicelen) {
1930             /* delete slice */
1931             for (i = stop; i < self->extra->length; i++)
1932                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1933         } else if (newlen > slicelen) {
1934             /* insert slice */
1935             for (i = self->extra->length-1; i >= stop; i--)
1936                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1937         }
1938 
1939         /* replace the slice */
1940         for (cur = start, i = 0; i < newlen;
1941              cur += step, i++) {
1942             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1943             Py_INCREF(element);
1944             self->extra->children[cur] = element;
1945         }
1946 
1947         self->extra->length += newlen - slicelen;
1948 
1949         Py_DECREF(seq);
1950 
1951         /* discard the recycle bin, and everything in it */
1952         Py_XDECREF(recycle);
1953 
1954         return 0;
1955     }
1956     else {
1957         PyErr_SetString(PyExc_TypeError,
1958                 "element indices must be integers");
1959         return -1;
1960     }
1961 }
1962 
1963 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1964 element_tag_getter(ElementObject *self, void *closure)
1965 {
1966     PyObject *res = self->tag;
1967     Py_INCREF(res);
1968     return res;
1969 }
1970 
1971 static PyObject*
element_text_getter(ElementObject * self,void * closure)1972 element_text_getter(ElementObject *self, void *closure)
1973 {
1974     PyObject *res = element_get_text(self);
1975     Py_XINCREF(res);
1976     return res;
1977 }
1978 
1979 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1980 element_tail_getter(ElementObject *self, void *closure)
1981 {
1982     PyObject *res = element_get_tail(self);
1983     Py_XINCREF(res);
1984     return res;
1985 }
1986 
1987 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)1988 element_attrib_getter(ElementObject *self, void *closure)
1989 {
1990     PyObject *res;
1991     if (!self->extra) {
1992         if (create_extra(self, NULL) < 0)
1993             return NULL;
1994     }
1995     res = element_get_attrib(self);
1996     Py_XINCREF(res);
1997     return res;
1998 }
1999 
2000 /* macro for setter validation */
2001 #define _VALIDATE_ATTR_VALUE(V)                     \
2002     if ((V) == NULL) {                              \
2003         PyErr_SetString(                            \
2004             PyExc_AttributeError,                   \
2005             "can't delete element attribute");      \
2006         return -1;                                  \
2007     }
2008 
2009 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2010 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2011 {
2012     _VALIDATE_ATTR_VALUE(value);
2013     Py_INCREF(value);
2014     Py_SETREF(self->tag, value);
2015     return 0;
2016 }
2017 
2018 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2019 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2020 {
2021     _VALIDATE_ATTR_VALUE(value);
2022     Py_INCREF(value);
2023     _set_joined_ptr(&self->text, value);
2024     return 0;
2025 }
2026 
2027 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2028 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2029 {
2030     _VALIDATE_ATTR_VALUE(value);
2031     Py_INCREF(value);
2032     _set_joined_ptr(&self->tail, value);
2033     return 0;
2034 }
2035 
2036 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2037 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2038 {
2039     _VALIDATE_ATTR_VALUE(value);
2040     if (!PyDict_Check(value)) {
2041         PyErr_Format(PyExc_TypeError,
2042                      "attrib must be dict, not %.200s",
2043                      value->ob_type->tp_name);
2044         return -1;
2045     }
2046     if (!self->extra) {
2047         if (create_extra(self, NULL) < 0)
2048             return -1;
2049     }
2050     Py_INCREF(value);
2051     Py_XSETREF(self->extra->attrib, value);
2052     return 0;
2053 }
2054 
2055 static PySequenceMethods element_as_sequence = {
2056     (lenfunc) element_length,
2057     0, /* sq_concat */
2058     0, /* sq_repeat */
2059     element_getitem,
2060     0,
2061     element_setitem,
2062     0,
2063 };
2064 
2065 /******************************* Element iterator ****************************/
2066 
2067 /* ElementIterObject represents the iteration state over an XML element in
2068  * pre-order traversal. To keep track of which sub-element should be returned
2069  * next, a stack of parents is maintained. This is a standard stack-based
2070  * iterative pre-order traversal of a tree.
2071  * The stack is managed using a continuous array.
2072  * Each stack item contains the saved parent to which we should return after
2073  * the current one is exhausted, and the next child to examine in that parent.
2074  */
2075 typedef struct ParentLocator_t {
2076     ElementObject *parent;
2077     Py_ssize_t child_index;
2078 } ParentLocator;
2079 
2080 typedef struct {
2081     PyObject_HEAD
2082     ParentLocator *parent_stack;
2083     Py_ssize_t parent_stack_used;
2084     Py_ssize_t parent_stack_size;
2085     ElementObject *root_element;
2086     PyObject *sought_tag;
2087     int gettext;
2088 } ElementIterObject;
2089 
2090 
2091 static void
elementiter_dealloc(ElementIterObject * it)2092 elementiter_dealloc(ElementIterObject *it)
2093 {
2094     Py_ssize_t i = it->parent_stack_used;
2095     it->parent_stack_used = 0;
2096     /* bpo-31095: UnTrack is needed before calling any callbacks */
2097     PyObject_GC_UnTrack(it);
2098     while (i--)
2099         Py_XDECREF(it->parent_stack[i].parent);
2100     PyMem_Free(it->parent_stack);
2101 
2102     Py_XDECREF(it->sought_tag);
2103     Py_XDECREF(it->root_element);
2104 
2105     PyObject_GC_Del(it);
2106 }
2107 
2108 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2109 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2110 {
2111     Py_ssize_t i = it->parent_stack_used;
2112     while (i--)
2113         Py_VISIT(it->parent_stack[i].parent);
2114 
2115     Py_VISIT(it->root_element);
2116     Py_VISIT(it->sought_tag);
2117     return 0;
2118 }
2119 
2120 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2121  */
2122 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2123 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2124 {
2125     ParentLocator *item;
2126 
2127     if (it->parent_stack_used >= it->parent_stack_size) {
2128         Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2129         ParentLocator *parent_stack = it->parent_stack;
2130         PyMem_Resize(parent_stack, ParentLocator, new_size);
2131         if (parent_stack == NULL)
2132             return -1;
2133         it->parent_stack = parent_stack;
2134         it->parent_stack_size = new_size;
2135     }
2136     item = it->parent_stack + it->parent_stack_used++;
2137     Py_INCREF(parent);
2138     item->parent = parent;
2139     item->child_index = 0;
2140     return 0;
2141 }
2142 
2143 static PyObject *
elementiter_next(ElementIterObject * it)2144 elementiter_next(ElementIterObject *it)
2145 {
2146     /* Sub-element iterator.
2147      *
2148      * A short note on gettext: this function serves both the iter() and
2149      * itertext() methods to avoid code duplication. However, there are a few
2150      * small differences in the way these iterations work. Namely:
2151      *   - itertext() only yields text from nodes that have it, and continues
2152      *     iterating when a node doesn't have text (so it doesn't return any
2153      *     node like iter())
2154      *   - itertext() also has to handle tail, after finishing with all the
2155      *     children of a node.
2156      */
2157     int rc;
2158     ElementObject *elem;
2159     PyObject *text;
2160 
2161     while (1) {
2162         /* Handle the case reached in the beginning and end of iteration, where
2163          * the parent stack is empty. If root_element is NULL and we're here, the
2164          * iterator is exhausted.
2165          */
2166         if (!it->parent_stack_used) {
2167             if (!it->root_element) {
2168                 PyErr_SetNone(PyExc_StopIteration);
2169                 return NULL;
2170             }
2171 
2172             elem = it->root_element;  /* steals a reference */
2173             it->root_element = NULL;
2174         }
2175         else {
2176             /* See if there are children left to traverse in the current parent. If
2177              * yes, visit the next child. If not, pop the stack and try again.
2178              */
2179             ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2180             Py_ssize_t child_index = item->child_index;
2181             ElementObjectExtra *extra;
2182             elem = item->parent;
2183             extra = elem->extra;
2184             if (!extra || child_index >= extra->length) {
2185                 it->parent_stack_used--;
2186                 /* Note that extra condition on it->parent_stack_used here;
2187                  * this is because itertext() is supposed to only return *inner*
2188                  * text, not text following the element it began iteration with.
2189                  */
2190                 if (it->gettext && it->parent_stack_used) {
2191                     text = element_get_tail(elem);
2192                     goto gettext;
2193                 }
2194                 Py_DECREF(elem);
2195                 continue;
2196             }
2197 
2198             assert(Element_Check(extra->children[child_index]));
2199             elem = (ElementObject *)extra->children[child_index];
2200             item->child_index++;
2201             Py_INCREF(elem);
2202         }
2203 
2204         if (parent_stack_push_new(it, elem) < 0) {
2205             Py_DECREF(elem);
2206             PyErr_NoMemory();
2207             return NULL;
2208         }
2209         if (it->gettext) {
2210             text = element_get_text(elem);
2211             goto gettext;
2212         }
2213 
2214         if (it->sought_tag == Py_None)
2215             return (PyObject *)elem;
2216 
2217         rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2218         if (rc > 0)
2219             return (PyObject *)elem;
2220 
2221         Py_DECREF(elem);
2222         if (rc < 0)
2223             return NULL;
2224         continue;
2225 
2226 gettext:
2227         if (!text) {
2228             Py_DECREF(elem);
2229             return NULL;
2230         }
2231         if (text == Py_None) {
2232             Py_DECREF(elem);
2233         }
2234         else {
2235             Py_INCREF(text);
2236             Py_DECREF(elem);
2237             rc = PyObject_IsTrue(text);
2238             if (rc > 0)
2239                 return text;
2240             Py_DECREF(text);
2241             if (rc < 0)
2242                 return NULL;
2243         }
2244     }
2245 
2246     return NULL;
2247 }
2248 
2249 
2250 static PyTypeObject ElementIter_Type = {
2251     PyVarObject_HEAD_INIT(NULL, 0)
2252     /* Using the module's name since the pure-Python implementation does not
2253        have such a type. */
2254     "_elementtree._element_iterator",           /* tp_name */
2255     sizeof(ElementIterObject),                  /* tp_basicsize */
2256     0,                                          /* tp_itemsize */
2257     /* methods */
2258     (destructor)elementiter_dealloc,            /* tp_dealloc */
2259     0,                                          /* tp_vectorcall_offset */
2260     0,                                          /* tp_getattr */
2261     0,                                          /* tp_setattr */
2262     0,                                          /* tp_as_async */
2263     0,                                          /* tp_repr */
2264     0,                                          /* tp_as_number */
2265     0,                                          /* tp_as_sequence */
2266     0,                                          /* tp_as_mapping */
2267     0,                                          /* tp_hash */
2268     0,                                          /* tp_call */
2269     0,                                          /* tp_str */
2270     0,                                          /* tp_getattro */
2271     0,                                          /* tp_setattro */
2272     0,                                          /* tp_as_buffer */
2273     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2274     0,                                          /* tp_doc */
2275     (traverseproc)elementiter_traverse,         /* tp_traverse */
2276     0,                                          /* tp_clear */
2277     0,                                          /* tp_richcompare */
2278     0,                                          /* tp_weaklistoffset */
2279     PyObject_SelfIter,                          /* tp_iter */
2280     (iternextfunc)elementiter_next,             /* tp_iternext */
2281     0,                                          /* tp_methods */
2282     0,                                          /* tp_members */
2283     0,                                          /* tp_getset */
2284     0,                                          /* tp_base */
2285     0,                                          /* tp_dict */
2286     0,                                          /* tp_descr_get */
2287     0,                                          /* tp_descr_set */
2288     0,                                          /* tp_dictoffset */
2289     0,                                          /* tp_init */
2290     0,                                          /* tp_alloc */
2291     0,                                          /* tp_new */
2292 };
2293 
2294 #define INIT_PARENT_STACK_SIZE 8
2295 
2296 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2297 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2298 {
2299     ElementIterObject *it;
2300 
2301     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2302     if (!it)
2303         return NULL;
2304 
2305     Py_INCREF(tag);
2306     it->sought_tag = tag;
2307     it->gettext = gettext;
2308     Py_INCREF(self);
2309     it->root_element = self;
2310 
2311     it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2312     if (it->parent_stack == NULL) {
2313         Py_DECREF(it);
2314         PyErr_NoMemory();
2315         return NULL;
2316     }
2317     it->parent_stack_used = 0;
2318     it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2319 
2320     PyObject_GC_Track(it);
2321 
2322     return (PyObject *)it;
2323 }
2324 
2325 
2326 /* ==================================================================== */
2327 /* the tree builder type */
2328 
2329 typedef struct {
2330     PyObject_HEAD
2331 
2332     PyObject *root; /* root node (first created node) */
2333 
2334     PyObject *this; /* current node */
2335     PyObject *last; /* most recently created node */
2336     PyObject *last_for_tail; /* most recently created node that takes a tail */
2337 
2338     PyObject *data; /* data collector (string or list), or NULL */
2339 
2340     PyObject *stack; /* element stack */
2341     Py_ssize_t index; /* current stack size (0 means empty) */
2342 
2343     PyObject *element_factory;
2344     PyObject *comment_factory;
2345     PyObject *pi_factory;
2346 
2347     /* element tracing */
2348     PyObject *events_append; /* the append method of the list of events, or NULL */
2349     PyObject *start_event_obj; /* event objects (NULL to ignore) */
2350     PyObject *end_event_obj;
2351     PyObject *start_ns_event_obj;
2352     PyObject *end_ns_event_obj;
2353     PyObject *comment_event_obj;
2354     PyObject *pi_event_obj;
2355 
2356     char insert_comments;
2357     char insert_pis;
2358 } TreeBuilderObject;
2359 
2360 #define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2361 
2362 /* -------------------------------------------------------------------- */
2363 /* constructor and destructor */
2364 
2365 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2366 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2367 {
2368     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2369     if (t != NULL) {
2370         t->root = NULL;
2371 
2372         Py_INCREF(Py_None);
2373         t->this = Py_None;
2374         Py_INCREF(Py_None);
2375         t->last = Py_None;
2376 
2377         t->data = NULL;
2378         t->element_factory = NULL;
2379         t->comment_factory = NULL;
2380         t->pi_factory = NULL;
2381         t->stack = PyList_New(20);
2382         if (!t->stack) {
2383             Py_DECREF(t->this);
2384             Py_DECREF(t->last);
2385             Py_DECREF((PyObject *) t);
2386             return NULL;
2387         }
2388         t->index = 0;
2389 
2390         t->events_append = NULL;
2391         t->start_event_obj = t->end_event_obj = NULL;
2392         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2393         t->comment_event_obj = t->pi_event_obj = NULL;
2394         t->insert_comments = t->insert_pis = 0;
2395     }
2396     return (PyObject *)t;
2397 }
2398 
2399 /*[clinic input]
2400 _elementtree.TreeBuilder.__init__
2401 
2402     element_factory: object = None
2403     *
2404     comment_factory: object = None
2405     pi_factory: object = None
2406     insert_comments: bool = False
2407     insert_pis: bool = False
2408 
2409 [clinic start generated code]*/
2410 
2411 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2412 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2413                                        PyObject *element_factory,
2414                                        PyObject *comment_factory,
2415                                        PyObject *pi_factory,
2416                                        int insert_comments, int insert_pis)
2417 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2418 {
2419     if (element_factory != Py_None) {
2420         Py_INCREF(element_factory);
2421         Py_XSETREF(self->element_factory, element_factory);
2422     } else {
2423         Py_CLEAR(self->element_factory);
2424     }
2425 
2426     if (comment_factory == Py_None) {
2427         elementtreestate *st = ET_STATE_GLOBAL;
2428         comment_factory = st->comment_factory;
2429     }
2430     if (comment_factory) {
2431         Py_INCREF(comment_factory);
2432         Py_XSETREF(self->comment_factory, comment_factory);
2433         self->insert_comments = insert_comments;
2434     } else {
2435         Py_CLEAR(self->comment_factory);
2436         self->insert_comments = 0;
2437     }
2438 
2439     if (pi_factory == Py_None) {
2440         elementtreestate *st = ET_STATE_GLOBAL;
2441         pi_factory = st->pi_factory;
2442     }
2443     if (pi_factory) {
2444         Py_INCREF(pi_factory);
2445         Py_XSETREF(self->pi_factory, pi_factory);
2446         self->insert_pis = insert_pis;
2447     } else {
2448         Py_CLEAR(self->pi_factory);
2449         self->insert_pis = 0;
2450     }
2451 
2452     return 0;
2453 }
2454 
2455 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2456 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2457 {
2458     Py_VISIT(self->pi_event_obj);
2459     Py_VISIT(self->comment_event_obj);
2460     Py_VISIT(self->end_ns_event_obj);
2461     Py_VISIT(self->start_ns_event_obj);
2462     Py_VISIT(self->end_event_obj);
2463     Py_VISIT(self->start_event_obj);
2464     Py_VISIT(self->events_append);
2465     Py_VISIT(self->root);
2466     Py_VISIT(self->this);
2467     Py_VISIT(self->last);
2468     Py_VISIT(self->last_for_tail);
2469     Py_VISIT(self->data);
2470     Py_VISIT(self->stack);
2471     Py_VISIT(self->pi_factory);
2472     Py_VISIT(self->comment_factory);
2473     Py_VISIT(self->element_factory);
2474     return 0;
2475 }
2476 
2477 static int
treebuilder_gc_clear(TreeBuilderObject * self)2478 treebuilder_gc_clear(TreeBuilderObject *self)
2479 {
2480     Py_CLEAR(self->pi_event_obj);
2481     Py_CLEAR(self->comment_event_obj);
2482     Py_CLEAR(self->end_ns_event_obj);
2483     Py_CLEAR(self->start_ns_event_obj);
2484     Py_CLEAR(self->end_event_obj);
2485     Py_CLEAR(self->start_event_obj);
2486     Py_CLEAR(self->events_append);
2487     Py_CLEAR(self->stack);
2488     Py_CLEAR(self->data);
2489     Py_CLEAR(self->last);
2490     Py_CLEAR(self->last_for_tail);
2491     Py_CLEAR(self->this);
2492     Py_CLEAR(self->pi_factory);
2493     Py_CLEAR(self->comment_factory);
2494     Py_CLEAR(self->element_factory);
2495     Py_CLEAR(self->root);
2496     return 0;
2497 }
2498 
2499 static void
treebuilder_dealloc(TreeBuilderObject * self)2500 treebuilder_dealloc(TreeBuilderObject *self)
2501 {
2502     PyObject_GC_UnTrack(self);
2503     treebuilder_gc_clear(self);
2504     Py_TYPE(self)->tp_free((PyObject *)self);
2505 }
2506 
2507 /* -------------------------------------------------------------------- */
2508 /* helpers for handling of arbitrary element-like objects */
2509 
2510 /*[clinic input]
2511 _elementtree._set_factories
2512 
2513     comment_factory: object
2514     pi_factory: object
2515     /
2516 
2517 Change the factories used to create comments and processing instructions.
2518 
2519 For internal use only.
2520 [clinic start generated code]*/
2521 
2522 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2523 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2524                                  PyObject *pi_factory)
2525 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2526 {
2527     elementtreestate *st = ET_STATE_GLOBAL;
2528     PyObject *old;
2529 
2530     if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2531         PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2532                      Py_TYPE(comment_factory)->tp_name);
2533         return NULL;
2534     }
2535     if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2536         PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2537                      Py_TYPE(pi_factory)->tp_name);
2538         return NULL;
2539     }
2540 
2541     old = PyTuple_Pack(2,
2542         st->comment_factory ? st->comment_factory : Py_None,
2543         st->pi_factory ? st->pi_factory : Py_None);
2544 
2545     if (comment_factory == Py_None) {
2546         Py_CLEAR(st->comment_factory);
2547     } else {
2548         Py_INCREF(comment_factory);
2549         Py_XSETREF(st->comment_factory, comment_factory);
2550     }
2551     if (pi_factory == Py_None) {
2552         Py_CLEAR(st->pi_factory);
2553     } else {
2554         Py_INCREF(pi_factory);
2555         Py_XSETREF(st->pi_factory, pi_factory);
2556     }
2557 
2558     return old;
2559 }
2560 
2561 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2562 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2563                                         PyObject **dest, _Py_Identifier *name)
2564 {
2565     /* Fast paths for the "almost always" cases. */
2566     if (Element_CheckExact(element)) {
2567         PyObject *dest_obj = JOIN_OBJ(*dest);
2568         if (dest_obj == Py_None) {
2569             *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2570             *data = NULL;
2571             Py_DECREF(dest_obj);
2572             return 0;
2573         }
2574         else if (JOIN_GET(*dest)) {
2575             if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2576                 return -1;
2577             }
2578             Py_CLEAR(*data);
2579             return 0;
2580         }
2581     }
2582 
2583     /*  Fallback for the non-Element / non-trivial cases. */
2584     {
2585         int r;
2586         PyObject* joined;
2587         PyObject* previous = _PyObject_GetAttrId(element, name);
2588         if (!previous)
2589             return -1;
2590         joined = list_join(*data);
2591         if (!joined) {
2592             Py_DECREF(previous);
2593             return -1;
2594         }
2595         if (previous != Py_None) {
2596             PyObject *tmp = PyNumber_Add(previous, joined);
2597             Py_DECREF(joined);
2598             Py_DECREF(previous);
2599             if (!tmp)
2600                 return -1;
2601             joined = tmp;
2602         } else {
2603             Py_DECREF(previous);
2604         }
2605 
2606         r = _PyObject_SetAttrId(element, name, joined);
2607         Py_DECREF(joined);
2608         if (r < 0)
2609             return -1;
2610         Py_CLEAR(*data);
2611         return 0;
2612     }
2613 }
2614 
2615 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2616 treebuilder_flush_data(TreeBuilderObject* self)
2617 {
2618     if (!self->data) {
2619         return 0;
2620     }
2621 
2622     if (!self->last_for_tail) {
2623         PyObject *element = self->last;
2624         _Py_IDENTIFIER(text);
2625         return treebuilder_extend_element_text_or_tail(
2626                 element, &self->data,
2627                 &((ElementObject *) element)->text, &PyId_text);
2628     }
2629     else {
2630         PyObject *element = self->last_for_tail;
2631         _Py_IDENTIFIER(tail);
2632         return treebuilder_extend_element_text_or_tail(
2633                 element, &self->data,
2634                 &((ElementObject *) element)->tail, &PyId_tail);
2635     }
2636 }
2637 
2638 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2639 treebuilder_add_subelement(PyObject *element, PyObject *child)
2640 {
2641     _Py_IDENTIFIER(append);
2642     if (Element_CheckExact(element)) {
2643         ElementObject *elem = (ElementObject *) element;
2644         return element_add_subelement(elem, child);
2645     }
2646     else {
2647         PyObject *res;
2648         res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2649         if (res == NULL)
2650             return -1;
2651         Py_DECREF(res);
2652         return 0;
2653     }
2654 }
2655 
2656 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2657 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2658                          PyObject *node)
2659 {
2660     if (action != NULL) {
2661         PyObject *res;
2662         PyObject *event = PyTuple_Pack(2, action, node);
2663         if (event == NULL)
2664             return -1;
2665         res = PyObject_CallOneArg(self->events_append, event);
2666         Py_DECREF(event);
2667         if (res == NULL)
2668             return -1;
2669         Py_DECREF(res);
2670     }
2671     return 0;
2672 }
2673 
2674 /* -------------------------------------------------------------------- */
2675 /* handlers */
2676 
2677 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2678 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2679                          PyObject* attrib)
2680 {
2681     PyObject* node;
2682     PyObject* this;
2683     elementtreestate *st = ET_STATE_GLOBAL;
2684 
2685     if (treebuilder_flush_data(self) < 0) {
2686         return NULL;
2687     }
2688 
2689     if (!self->element_factory) {
2690         node = create_new_element(tag, attrib);
2691     } else if (attrib == NULL) {
2692         attrib = PyDict_New();
2693         if (!attrib)
2694             return NULL;
2695         node = PyObject_CallFunctionObjArgs(self->element_factory,
2696                                             tag, attrib, NULL);
2697         Py_DECREF(attrib);
2698     }
2699     else {
2700         node = PyObject_CallFunctionObjArgs(self->element_factory,
2701                                             tag, attrib, NULL);
2702     }
2703     if (!node) {
2704         return NULL;
2705     }
2706 
2707     this = self->this;
2708     Py_CLEAR(self->last_for_tail);
2709 
2710     if (this != Py_None) {
2711         if (treebuilder_add_subelement(this, node) < 0)
2712             goto error;
2713     } else {
2714         if (self->root) {
2715             PyErr_SetString(
2716                 st->parseerror_obj,
2717                 "multiple elements on top level"
2718                 );
2719             goto error;
2720         }
2721         Py_INCREF(node);
2722         self->root = node;
2723     }
2724 
2725     if (self->index < PyList_GET_SIZE(self->stack)) {
2726         if (PyList_SetItem(self->stack, self->index, this) < 0)
2727             goto error;
2728         Py_INCREF(this);
2729     } else {
2730         if (PyList_Append(self->stack, this) < 0)
2731             goto error;
2732     }
2733     self->index++;
2734 
2735     Py_INCREF(node);
2736     Py_SETREF(self->this, node);
2737     Py_INCREF(node);
2738     Py_SETREF(self->last, node);
2739 
2740     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2741         goto error;
2742 
2743     return node;
2744 
2745   error:
2746     Py_DECREF(node);
2747     return NULL;
2748 }
2749 
2750 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2751 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2752 {
2753     if (!self->data) {
2754         if (self->last == Py_None) {
2755             /* ignore calls to data before the first call to start */
2756             Py_RETURN_NONE;
2757         }
2758         /* store the first item as is */
2759         Py_INCREF(data); self->data = data;
2760     } else {
2761         /* more than one item; use a list to collect items */
2762         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2763             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2764             /* XXX this code path unused in Python 3? */
2765             /* expat often generates single character data sections; handle
2766                the most common case by resizing the existing string... */
2767             Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2768             if (_PyBytes_Resize(&self->data, size + 1) < 0)
2769                 return NULL;
2770             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2771         } else if (PyList_CheckExact(self->data)) {
2772             if (PyList_Append(self->data, data) < 0)
2773                 return NULL;
2774         } else {
2775             PyObject* list = PyList_New(2);
2776             if (!list)
2777                 return NULL;
2778             PyList_SET_ITEM(list, 0, self->data);
2779             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2780             self->data = list;
2781         }
2782     }
2783 
2784     Py_RETURN_NONE;
2785 }
2786 
2787 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2788 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2789 {
2790     PyObject* item;
2791 
2792     if (treebuilder_flush_data(self) < 0) {
2793         return NULL;
2794     }
2795 
2796     if (self->index == 0) {
2797         PyErr_SetString(
2798             PyExc_IndexError,
2799             "pop from empty stack"
2800             );
2801         return NULL;
2802     }
2803 
2804     item = self->last;
2805     self->last = self->this;
2806     Py_INCREF(self->last);
2807     Py_XSETREF(self->last_for_tail, self->last);
2808     self->index--;
2809     self->this = PyList_GET_ITEM(self->stack, self->index);
2810     Py_INCREF(self->this);
2811     Py_DECREF(item);
2812 
2813     if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2814         return NULL;
2815 
2816     Py_INCREF(self->last);
2817     return (PyObject*) self->last;
2818 }
2819 
2820 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2821 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2822 {
2823     PyObject* comment;
2824     PyObject* this;
2825 
2826     if (treebuilder_flush_data(self) < 0) {
2827         return NULL;
2828     }
2829 
2830     if (self->comment_factory) {
2831         comment = PyObject_CallOneArg(self->comment_factory, text);
2832         if (!comment)
2833             return NULL;
2834 
2835         this = self->this;
2836         if (self->insert_comments && this != Py_None) {
2837             if (treebuilder_add_subelement(this, comment) < 0)
2838                 goto error;
2839             Py_INCREF(comment);
2840             Py_XSETREF(self->last_for_tail, comment);
2841         }
2842     } else {
2843         Py_INCREF(text);
2844         comment = text;
2845     }
2846 
2847     if (self->events_append && self->comment_event_obj) {
2848         if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2849             goto error;
2850     }
2851 
2852     return comment;
2853 
2854   error:
2855     Py_DECREF(comment);
2856     return NULL;
2857 }
2858 
2859 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2860 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2861 {
2862     PyObject* pi;
2863     PyObject* this;
2864     PyObject* stack[2] = {target, text};
2865 
2866     if (treebuilder_flush_data(self) < 0) {
2867         return NULL;
2868     }
2869 
2870     if (self->pi_factory) {
2871         pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2872         if (!pi) {
2873             return NULL;
2874         }
2875 
2876         this = self->this;
2877         if (self->insert_pis && this != Py_None) {
2878             if (treebuilder_add_subelement(this, pi) < 0)
2879                 goto error;
2880             Py_INCREF(pi);
2881             Py_XSETREF(self->last_for_tail, pi);
2882         }
2883     } else {
2884         pi = PyTuple_Pack(2, target, text);
2885         if (!pi) {
2886             return NULL;
2887         }
2888     }
2889 
2890     if (self->events_append && self->pi_event_obj) {
2891         if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2892             goto error;
2893     }
2894 
2895     return pi;
2896 
2897   error:
2898     Py_DECREF(pi);
2899     return NULL;
2900 }
2901 
2902 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2903 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2904 {
2905     PyObject* parcel;
2906 
2907     if (self->events_append && self->start_ns_event_obj) {
2908         parcel = PyTuple_Pack(2, prefix, uri);
2909         if (!parcel) {
2910             return NULL;
2911         }
2912 
2913         if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2914             Py_DECREF(parcel);
2915             return NULL;
2916         }
2917         Py_DECREF(parcel);
2918     }
2919 
2920     Py_RETURN_NONE;
2921 }
2922 
2923 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2924 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2925 {
2926     if (self->events_append && self->end_ns_event_obj) {
2927         if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2928             return NULL;
2929         }
2930     }
2931 
2932     Py_RETURN_NONE;
2933 }
2934 
2935 /* -------------------------------------------------------------------- */
2936 /* methods (in alphabetical order) */
2937 
2938 /*[clinic input]
2939 _elementtree.TreeBuilder.data
2940 
2941     data: object
2942     /
2943 
2944 [clinic start generated code]*/
2945 
2946 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2947 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2948 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2949 {
2950     return treebuilder_handle_data(self, data);
2951 }
2952 
2953 /*[clinic input]
2954 _elementtree.TreeBuilder.end
2955 
2956     tag: object
2957     /
2958 
2959 [clinic start generated code]*/
2960 
2961 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2962 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2963 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2964 {
2965     return treebuilder_handle_end(self, tag);
2966 }
2967 
2968 /*[clinic input]
2969 _elementtree.TreeBuilder.comment
2970 
2971     text: object
2972     /
2973 
2974 [clinic start generated code]*/
2975 
2976 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)2977 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2978 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2979 {
2980     return treebuilder_handle_comment(self, text);
2981 }
2982 
2983 /*[clinic input]
2984 _elementtree.TreeBuilder.pi
2985 
2986     target: object
2987     text: object = None
2988     /
2989 
2990 [clinic start generated code]*/
2991 
2992 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)2993 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2994                                  PyObject *text)
2995 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2996 {
2997     return treebuilder_handle_pi(self, target, text);
2998 }
2999 
3000 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)3001 treebuilder_done(TreeBuilderObject* self)
3002 {
3003     PyObject* res;
3004 
3005     /* FIXME: check stack size? */
3006 
3007     if (self->root)
3008         res = self->root;
3009     else
3010         res = Py_None;
3011 
3012     Py_INCREF(res);
3013     return res;
3014 }
3015 
3016 /*[clinic input]
3017 _elementtree.TreeBuilder.close
3018 
3019 [clinic start generated code]*/
3020 
3021 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3022 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3023 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3024 {
3025     return treebuilder_done(self);
3026 }
3027 
3028 /*[clinic input]
3029 _elementtree.TreeBuilder.start
3030 
3031     tag: object
3032     attrs: object(subclass_of='&PyDict_Type')
3033     /
3034 
3035 [clinic start generated code]*/
3036 
3037 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3038 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3039                                     PyObject *attrs)
3040 /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3041 {
3042     return treebuilder_handle_start(self, tag, attrs);
3043 }
3044 
3045 /* ==================================================================== */
3046 /* the expat interface */
3047 
3048 #include "expat.h"
3049 #include "pyexpat.h"
3050 
3051 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3052  * cached globally without being in per-module state.
3053  */
3054 static struct PyExpat_CAPI *expat_capi;
3055 #define EXPAT(func) (expat_capi->func)
3056 
3057 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3058     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3059 
3060 typedef struct {
3061     PyObject_HEAD
3062 
3063     XML_Parser parser;
3064 
3065     PyObject *target;
3066     PyObject *entity;
3067 
3068     PyObject *names;
3069 
3070     PyObject *handle_start_ns;
3071     PyObject *handle_end_ns;
3072     PyObject *handle_start;
3073     PyObject *handle_data;
3074     PyObject *handle_end;
3075 
3076     PyObject *handle_comment;
3077     PyObject *handle_pi;
3078     PyObject *handle_doctype;
3079 
3080     PyObject *handle_close;
3081 
3082 } XMLParserObject;
3083 
3084 /* helpers */
3085 
3086 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3087 makeuniversal(XMLParserObject* self, const char* string)
3088 {
3089     /* convert a UTF-8 tag/attribute name from the expat parser
3090        to a universal name string */
3091 
3092     Py_ssize_t size = (Py_ssize_t) strlen(string);
3093     PyObject* key;
3094     PyObject* value;
3095 
3096     /* look the 'raw' name up in the names dictionary */
3097     key = PyBytes_FromStringAndSize(string, size);
3098     if (!key)
3099         return NULL;
3100 
3101     value = PyDict_GetItemWithError(self->names, key);
3102 
3103     if (value) {
3104         Py_INCREF(value);
3105     }
3106     else if (!PyErr_Occurred()) {
3107         /* new name.  convert to universal name, and decode as
3108            necessary */
3109 
3110         PyObject* tag;
3111         char* p;
3112         Py_ssize_t i;
3113 
3114         /* look for namespace separator */
3115         for (i = 0; i < size; i++)
3116             if (string[i] == '}')
3117                 break;
3118         if (i != size) {
3119             /* convert to universal name */
3120             tag = PyBytes_FromStringAndSize(NULL, size+1);
3121             if (tag == NULL) {
3122                 Py_DECREF(key);
3123                 return NULL;
3124             }
3125             p = PyBytes_AS_STRING(tag);
3126             p[0] = '{';
3127             memcpy(p+1, string, size);
3128             size++;
3129         } else {
3130             /* plain name; use key as tag */
3131             Py_INCREF(key);
3132             tag = key;
3133         }
3134 
3135         /* decode universal name */
3136         p = PyBytes_AS_STRING(tag);
3137         value = PyUnicode_DecodeUTF8(p, size, "strict");
3138         Py_DECREF(tag);
3139         if (!value) {
3140             Py_DECREF(key);
3141             return NULL;
3142         }
3143 
3144         /* add to names dictionary */
3145         if (PyDict_SetItem(self->names, key, value) < 0) {
3146             Py_DECREF(key);
3147             Py_DECREF(value);
3148             return NULL;
3149         }
3150     }
3151 
3152     Py_DECREF(key);
3153     return value;
3154 }
3155 
3156 /* Set the ParseError exception with the given parameters.
3157  * If message is not NULL, it's used as the error string. Otherwise, the
3158  * message string is the default for the given error_code.
3159 */
3160 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3161 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3162                 const char *message)
3163 {
3164     PyObject *errmsg, *error, *position, *code;
3165     elementtreestate *st = ET_STATE_GLOBAL;
3166 
3167     errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3168                 message ? message : EXPAT(ErrorString)(error_code),
3169                 line, column);
3170     if (errmsg == NULL)
3171         return;
3172 
3173     error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3174     Py_DECREF(errmsg);
3175     if (!error)
3176         return;
3177 
3178     /* Add code and position attributes */
3179     code = PyLong_FromLong((long)error_code);
3180     if (!code) {
3181         Py_DECREF(error);
3182         return;
3183     }
3184     if (PyObject_SetAttrString(error, "code", code) == -1) {
3185         Py_DECREF(error);
3186         Py_DECREF(code);
3187         return;
3188     }
3189     Py_DECREF(code);
3190 
3191     position = Py_BuildValue("(nn)", line, column);
3192     if (!position) {
3193         Py_DECREF(error);
3194         return;
3195     }
3196     if (PyObject_SetAttrString(error, "position", position) == -1) {
3197         Py_DECREF(error);
3198         Py_DECREF(position);
3199         return;
3200     }
3201     Py_DECREF(position);
3202 
3203     PyErr_SetObject(st->parseerror_obj, error);
3204     Py_DECREF(error);
3205 }
3206 
3207 /* -------------------------------------------------------------------- */
3208 /* handlers */
3209 
3210 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3211 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3212                       int data_len)
3213 {
3214     PyObject* key;
3215     PyObject* value;
3216     PyObject* res;
3217 
3218     if (data_len < 2 || data_in[0] != '&')
3219         return;
3220 
3221     if (PyErr_Occurred())
3222         return;
3223 
3224     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3225     if (!key)
3226         return;
3227 
3228     value = PyDict_GetItemWithError(self->entity, key);
3229 
3230     if (value) {
3231         if (TreeBuilder_CheckExact(self->target))
3232             res = treebuilder_handle_data(
3233                 (TreeBuilderObject*) self->target, value
3234                 );
3235         else if (self->handle_data)
3236             res = PyObject_CallOneArg(self->handle_data, value);
3237         else
3238             res = NULL;
3239         Py_XDECREF(res);
3240     } else if (!PyErr_Occurred()) {
3241         /* Report the first error, not the last */
3242         char message[128] = "undefined entity ";
3243         strncat(message, data_in, data_len < 100?data_len:100);
3244         expat_set_error(
3245             XML_ERROR_UNDEFINED_ENTITY,
3246             EXPAT(GetErrorLineNumber)(self->parser),
3247             EXPAT(GetErrorColumnNumber)(self->parser),
3248             message
3249             );
3250     }
3251 
3252     Py_DECREF(key);
3253 }
3254 
3255 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3256 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3257                     const XML_Char **attrib_in)
3258 {
3259     PyObject* res;
3260     PyObject* tag;
3261     PyObject* attrib;
3262     int ok;
3263 
3264     if (PyErr_Occurred())
3265         return;
3266 
3267     /* tag name */
3268     tag = makeuniversal(self, tag_in);
3269     if (!tag)
3270         return; /* parser will look for errors */
3271 
3272     /* attributes */
3273     if (attrib_in[0]) {
3274         attrib = PyDict_New();
3275         if (!attrib) {
3276             Py_DECREF(tag);
3277             return;
3278         }
3279         while (attrib_in[0] && attrib_in[1]) {
3280             PyObject* key = makeuniversal(self, attrib_in[0]);
3281             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3282             if (!key || !value) {
3283                 Py_XDECREF(value);
3284                 Py_XDECREF(key);
3285                 Py_DECREF(attrib);
3286                 Py_DECREF(tag);
3287                 return;
3288             }
3289             ok = PyDict_SetItem(attrib, key, value);
3290             Py_DECREF(value);
3291             Py_DECREF(key);
3292             if (ok < 0) {
3293                 Py_DECREF(attrib);
3294                 Py_DECREF(tag);
3295                 return;
3296             }
3297             attrib_in += 2;
3298         }
3299     } else {
3300         attrib = NULL;
3301     }
3302 
3303     if (TreeBuilder_CheckExact(self->target)) {
3304         /* shortcut */
3305         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3306                                        tag, attrib);
3307     }
3308     else if (self->handle_start) {
3309         if (attrib == NULL) {
3310             attrib = PyDict_New();
3311             if (!attrib) {
3312                 Py_DECREF(tag);
3313                 return;
3314             }
3315         }
3316         res = PyObject_CallFunctionObjArgs(self->handle_start,
3317                                            tag, attrib, NULL);
3318     } else
3319         res = NULL;
3320 
3321     Py_DECREF(tag);
3322     Py_XDECREF(attrib);
3323 
3324     Py_XDECREF(res);
3325 }
3326 
3327 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3328 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3329                    int data_len)
3330 {
3331     PyObject* data;
3332     PyObject* res;
3333 
3334     if (PyErr_Occurred())
3335         return;
3336 
3337     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3338     if (!data)
3339         return; /* parser will look for errors */
3340 
3341     if (TreeBuilder_CheckExact(self->target))
3342         /* shortcut */
3343         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3344     else if (self->handle_data)
3345         res = PyObject_CallOneArg(self->handle_data, data);
3346     else
3347         res = NULL;
3348 
3349     Py_DECREF(data);
3350 
3351     Py_XDECREF(res);
3352 }
3353 
3354 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3355 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3356 {
3357     PyObject* tag;
3358     PyObject* res = NULL;
3359 
3360     if (PyErr_Occurred())
3361         return;
3362 
3363     if (TreeBuilder_CheckExact(self->target))
3364         /* shortcut */
3365         /* the standard tree builder doesn't look at the end tag */
3366         res = treebuilder_handle_end(
3367             (TreeBuilderObject*) self->target, Py_None
3368             );
3369     else if (self->handle_end) {
3370         tag = makeuniversal(self, tag_in);
3371         if (tag) {
3372             res = PyObject_CallOneArg(self->handle_end, tag);
3373             Py_DECREF(tag);
3374         }
3375     }
3376 
3377     Py_XDECREF(res);
3378 }
3379 
3380 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3381 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3382                        const XML_Char *uri_in)
3383 {
3384     PyObject* res = NULL;
3385     PyObject* uri;
3386     PyObject* prefix;
3387     PyObject* stack[2];
3388 
3389     if (PyErr_Occurred())
3390         return;
3391 
3392     if (!uri_in)
3393         uri_in = "";
3394     if (!prefix_in)
3395         prefix_in = "";
3396 
3397     if (TreeBuilder_CheckExact(self->target)) {
3398         /* shortcut - TreeBuilder does not actually implement .start_ns() */
3399         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3400 
3401         if (target->events_append && target->start_ns_event_obj) {
3402             prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3403             if (!prefix)
3404                 return;
3405             uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3406             if (!uri) {
3407                 Py_DECREF(prefix);
3408                 return;
3409             }
3410 
3411             res = treebuilder_handle_start_ns(target, prefix, uri);
3412             Py_DECREF(uri);
3413             Py_DECREF(prefix);
3414         }
3415     } else if (self->handle_start_ns) {
3416         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3417         if (!prefix)
3418             return;
3419         uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3420         if (!uri) {
3421             Py_DECREF(prefix);
3422             return;
3423         }
3424 
3425         stack[0] = prefix;
3426         stack[1] = uri;
3427         res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3428         Py_DECREF(uri);
3429         Py_DECREF(prefix);
3430     }
3431 
3432     Py_XDECREF(res);
3433 }
3434 
3435 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3436 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3437 {
3438     PyObject *res = NULL;
3439     PyObject* prefix;
3440 
3441     if (PyErr_Occurred())
3442         return;
3443 
3444     if (!prefix_in)
3445         prefix_in = "";
3446 
3447     if (TreeBuilder_CheckExact(self->target)) {
3448         /* shortcut - TreeBuilder does not actually implement .end_ns() */
3449         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3450 
3451         if (target->events_append && target->end_ns_event_obj) {
3452             res = treebuilder_handle_end_ns(target, Py_None);
3453         }
3454     } else if (self->handle_end_ns) {
3455         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3456         if (!prefix)
3457             return;
3458 
3459         res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3460         Py_DECREF(prefix);
3461     }
3462 
3463     Py_XDECREF(res);
3464 }
3465 
3466 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3467 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3468 {
3469     PyObject* comment;
3470     PyObject* res;
3471 
3472     if (PyErr_Occurred())
3473         return;
3474 
3475     if (TreeBuilder_CheckExact(self->target)) {
3476         /* shortcut */
3477         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3478 
3479         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3480         if (!comment)
3481             return; /* parser will look for errors */
3482 
3483         res = treebuilder_handle_comment(target,  comment);
3484         Py_XDECREF(res);
3485         Py_DECREF(comment);
3486     } else if (self->handle_comment) {
3487         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3488         if (!comment)
3489             return;
3490 
3491         res = PyObject_CallOneArg(self->handle_comment, comment);
3492         Py_XDECREF(res);
3493         Py_DECREF(comment);
3494     }
3495 }
3496 
3497 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3498 expat_start_doctype_handler(XMLParserObject *self,
3499                             const XML_Char *doctype_name,
3500                             const XML_Char *sysid,
3501                             const XML_Char *pubid,
3502                             int has_internal_subset)
3503 {
3504     _Py_IDENTIFIER(doctype);
3505     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3506     PyObject *res;
3507 
3508     if (PyErr_Occurred())
3509         return;
3510 
3511     doctype_name_obj = makeuniversal(self, doctype_name);
3512     if (!doctype_name_obj)
3513         return;
3514 
3515     if (sysid) {
3516         sysid_obj = makeuniversal(self, sysid);
3517         if (!sysid_obj) {
3518             Py_DECREF(doctype_name_obj);
3519             return;
3520         }
3521     } else {
3522         Py_INCREF(Py_None);
3523         sysid_obj = Py_None;
3524     }
3525 
3526     if (pubid) {
3527         pubid_obj = makeuniversal(self, pubid);
3528         if (!pubid_obj) {
3529             Py_DECREF(doctype_name_obj);
3530             Py_DECREF(sysid_obj);
3531             return;
3532         }
3533     } else {
3534         Py_INCREF(Py_None);
3535         pubid_obj = Py_None;
3536     }
3537 
3538     /* If the target has a handler for doctype, call it. */
3539     if (self->handle_doctype) {
3540         res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3541                                            doctype_name_obj, pubid_obj,
3542                                            sysid_obj, NULL);
3543         Py_XDECREF(res);
3544     }
3545     else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3546         (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3547                 "The doctype() method of XMLParser is ignored.  "
3548                 "Define doctype() method on the TreeBuilder target.",
3549                 1);
3550         Py_DECREF(res);
3551     }
3552 
3553     Py_DECREF(doctype_name_obj);
3554     Py_DECREF(pubid_obj);
3555     Py_DECREF(sysid_obj);
3556 }
3557 
3558 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3559 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3560                  const XML_Char* data_in)
3561 {
3562     PyObject* pi_target;
3563     PyObject* data;
3564     PyObject* res;
3565     PyObject* stack[2];
3566 
3567     if (PyErr_Occurred())
3568         return;
3569 
3570     if (TreeBuilder_CheckExact(self->target)) {
3571         /* shortcut */
3572         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3573 
3574         if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3575             pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3576             if (!pi_target)
3577                 goto error;
3578             data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3579             if (!data)
3580                 goto error;
3581             res = treebuilder_handle_pi(target, pi_target, data);
3582             Py_XDECREF(res);
3583             Py_DECREF(data);
3584             Py_DECREF(pi_target);
3585         }
3586     } else if (self->handle_pi) {
3587         pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3588         if (!pi_target)
3589             goto error;
3590         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3591         if (!data)
3592             goto error;
3593 
3594         stack[0] = pi_target;
3595         stack[1] = data;
3596         res = _PyObject_FastCall(self->handle_pi, stack, 2);
3597         Py_XDECREF(res);
3598         Py_DECREF(data);
3599         Py_DECREF(pi_target);
3600     }
3601 
3602     return;
3603 
3604   error:
3605     Py_XDECREF(pi_target);
3606     return;
3607 }
3608 
3609 /* -------------------------------------------------------------------- */
3610 
3611 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3612 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3613 {
3614     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3615     if (self) {
3616         self->parser = NULL;
3617         self->target = self->entity = self->names = NULL;
3618         self->handle_start_ns = self->handle_end_ns = NULL;
3619         self->handle_start = self->handle_data = self->handle_end = NULL;
3620         self->handle_comment = self->handle_pi = self->handle_close = NULL;
3621         self->handle_doctype = NULL;
3622     }
3623     return (PyObject *)self;
3624 }
3625 
3626 static int
ignore_attribute_error(PyObject * value)3627 ignore_attribute_error(PyObject *value)
3628 {
3629     if (value == NULL) {
3630         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3631             return -1;
3632         }
3633         PyErr_Clear();
3634     }
3635     return 0;
3636 }
3637 
3638 /*[clinic input]
3639 _elementtree.XMLParser.__init__
3640 
3641     *
3642     target: object = NULL
3643     encoding: str(accept={str, NoneType}) = None
3644 
3645 [clinic start generated code]*/
3646 
3647 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3648 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3649                                      const char *encoding)
3650 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
3651 {
3652     self->entity = PyDict_New();
3653     if (!self->entity)
3654         return -1;
3655 
3656     self->names = PyDict_New();
3657     if (!self->names) {
3658         Py_CLEAR(self->entity);
3659         return -1;
3660     }
3661 
3662     self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3663     if (!self->parser) {
3664         Py_CLEAR(self->entity);
3665         Py_CLEAR(self->names);
3666         PyErr_NoMemory();
3667         return -1;
3668     }
3669     /* expat < 2.1.0 has no XML_SetHashSalt() */
3670     if (EXPAT(SetHashSalt) != NULL) {
3671         EXPAT(SetHashSalt)(self->parser,
3672                            (unsigned long)_Py_HashSecret.expat.hashsalt);
3673     }
3674 
3675     if (target) {
3676         Py_INCREF(target);
3677     } else {
3678         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3679         if (!target) {
3680             Py_CLEAR(self->entity);
3681             Py_CLEAR(self->names);
3682             return -1;
3683         }
3684     }
3685     self->target = target;
3686 
3687     self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3688     if (ignore_attribute_error(self->handle_start_ns)) {
3689         return -1;
3690     }
3691     self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3692     if (ignore_attribute_error(self->handle_end_ns)) {
3693         return -1;
3694     }
3695     self->handle_start = PyObject_GetAttrString(target, "start");
3696     if (ignore_attribute_error(self->handle_start)) {
3697         return -1;
3698     }
3699     self->handle_data = PyObject_GetAttrString(target, "data");
3700     if (ignore_attribute_error(self->handle_data)) {
3701         return -1;
3702     }
3703     self->handle_end = PyObject_GetAttrString(target, "end");
3704     if (ignore_attribute_error(self->handle_end)) {
3705         return -1;
3706     }
3707     self->handle_comment = PyObject_GetAttrString(target, "comment");
3708     if (ignore_attribute_error(self->handle_comment)) {
3709         return -1;
3710     }
3711     self->handle_pi = PyObject_GetAttrString(target, "pi");
3712     if (ignore_attribute_error(self->handle_pi)) {
3713         return -1;
3714     }
3715     self->handle_close = PyObject_GetAttrString(target, "close");
3716     if (ignore_attribute_error(self->handle_close)) {
3717         return -1;
3718     }
3719     self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3720     if (ignore_attribute_error(self->handle_doctype)) {
3721         return -1;
3722     }
3723 
3724     /* configure parser */
3725     EXPAT(SetUserData)(self->parser, self);
3726     if (self->handle_start_ns || self->handle_end_ns)
3727         EXPAT(SetNamespaceDeclHandler)(
3728             self->parser,
3729             (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3730             (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3731             );
3732     EXPAT(SetElementHandler)(
3733         self->parser,
3734         (XML_StartElementHandler) expat_start_handler,
3735         (XML_EndElementHandler) expat_end_handler
3736         );
3737     EXPAT(SetDefaultHandlerExpand)(
3738         self->parser,
3739         (XML_DefaultHandler) expat_default_handler
3740         );
3741     EXPAT(SetCharacterDataHandler)(
3742         self->parser,
3743         (XML_CharacterDataHandler) expat_data_handler
3744         );
3745     if (self->handle_comment)
3746         EXPAT(SetCommentHandler)(
3747             self->parser,
3748             (XML_CommentHandler) expat_comment_handler
3749             );
3750     if (self->handle_pi)
3751         EXPAT(SetProcessingInstructionHandler)(
3752             self->parser,
3753             (XML_ProcessingInstructionHandler) expat_pi_handler
3754             );
3755     EXPAT(SetStartDoctypeDeclHandler)(
3756         self->parser,
3757         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3758         );
3759     EXPAT(SetUnknownEncodingHandler)(
3760         self->parser,
3761         EXPAT(DefaultUnknownEncodingHandler), NULL
3762         );
3763 
3764     return 0;
3765 }
3766 
3767 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3768 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3769 {
3770     Py_VISIT(self->handle_close);
3771     Py_VISIT(self->handle_pi);
3772     Py_VISIT(self->handle_comment);
3773     Py_VISIT(self->handle_end);
3774     Py_VISIT(self->handle_data);
3775     Py_VISIT(self->handle_start);
3776     Py_VISIT(self->handle_start_ns);
3777     Py_VISIT(self->handle_end_ns);
3778     Py_VISIT(self->handle_doctype);
3779 
3780     Py_VISIT(self->target);
3781     Py_VISIT(self->entity);
3782     Py_VISIT(self->names);
3783 
3784     return 0;
3785 }
3786 
3787 static int
xmlparser_gc_clear(XMLParserObject * self)3788 xmlparser_gc_clear(XMLParserObject *self)
3789 {
3790     if (self->parser != NULL) {
3791         XML_Parser parser = self->parser;
3792         self->parser = NULL;
3793         EXPAT(ParserFree)(parser);
3794     }
3795 
3796     Py_CLEAR(self->handle_close);
3797     Py_CLEAR(self->handle_pi);
3798     Py_CLEAR(self->handle_comment);
3799     Py_CLEAR(self->handle_end);
3800     Py_CLEAR(self->handle_data);
3801     Py_CLEAR(self->handle_start);
3802     Py_CLEAR(self->handle_start_ns);
3803     Py_CLEAR(self->handle_end_ns);
3804     Py_CLEAR(self->handle_doctype);
3805 
3806     Py_CLEAR(self->target);
3807     Py_CLEAR(self->entity);
3808     Py_CLEAR(self->names);
3809 
3810     return 0;
3811 }
3812 
3813 static void
xmlparser_dealloc(XMLParserObject * self)3814 xmlparser_dealloc(XMLParserObject* self)
3815 {
3816     PyObject_GC_UnTrack(self);
3817     xmlparser_gc_clear(self);
3818     Py_TYPE(self)->tp_free((PyObject *)self);
3819 }
3820 
3821 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3822 _check_xmlparser(XMLParserObject* self)
3823 {
3824     if (self->target == NULL) {
3825         PyErr_SetString(PyExc_ValueError,
3826                         "XMLParser.__init__() wasn't called");
3827         return 0;
3828     }
3829     return 1;
3830 }
3831 
3832 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3833 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3834 {
3835     int ok;
3836 
3837     assert(!PyErr_Occurred());
3838     ok = EXPAT(Parse)(self->parser, data, data_len, final);
3839 
3840     if (PyErr_Occurred())
3841         return NULL;
3842 
3843     if (!ok) {
3844         expat_set_error(
3845             EXPAT(GetErrorCode)(self->parser),
3846             EXPAT(GetErrorLineNumber)(self->parser),
3847             EXPAT(GetErrorColumnNumber)(self->parser),
3848             NULL
3849             );
3850         return NULL;
3851     }
3852 
3853     Py_RETURN_NONE;
3854 }
3855 
3856 /*[clinic input]
3857 _elementtree.XMLParser.close
3858 
3859 [clinic start generated code]*/
3860 
3861 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3862 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3863 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3864 {
3865     /* end feeding data to parser */
3866 
3867     PyObject* res;
3868 
3869     if (!_check_xmlparser(self)) {
3870         return NULL;
3871     }
3872     res = expat_parse(self, "", 0, 1);
3873     if (!res)
3874         return NULL;
3875 
3876     if (TreeBuilder_CheckExact(self->target)) {
3877         Py_DECREF(res);
3878         return treebuilder_done((TreeBuilderObject*) self->target);
3879     }
3880     else if (self->handle_close) {
3881         Py_DECREF(res);
3882         return PyObject_CallNoArgs(self->handle_close);
3883     }
3884     else {
3885         return res;
3886     }
3887 }
3888 
3889 /*[clinic input]
3890 _elementtree.XMLParser.feed
3891 
3892     data: object
3893     /
3894 
3895 [clinic start generated code]*/
3896 
3897 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3898 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3899 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3900 {
3901     /* feed data to parser */
3902 
3903     if (!_check_xmlparser(self)) {
3904         return NULL;
3905     }
3906     if (PyUnicode_Check(data)) {
3907         Py_ssize_t data_len;
3908         const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3909         if (data_ptr == NULL)
3910             return NULL;
3911         if (data_len > INT_MAX) {
3912             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3913             return NULL;
3914         }
3915         /* Explicitly set UTF-8 encoding. Return code ignored. */
3916         (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3917         return expat_parse(self, data_ptr, (int)data_len, 0);
3918     }
3919     else {
3920         Py_buffer view;
3921         PyObject *res;
3922         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3923             return NULL;
3924         if (view.len > INT_MAX) {
3925             PyBuffer_Release(&view);
3926             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3927             return NULL;
3928         }
3929         res = expat_parse(self, view.buf, (int)view.len, 0);
3930         PyBuffer_Release(&view);
3931         return res;
3932     }
3933 }
3934 
3935 /*[clinic input]
3936 _elementtree.XMLParser._parse_whole
3937 
3938     file: object
3939     /
3940 
3941 [clinic start generated code]*/
3942 
3943 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3944 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3945 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3946 {
3947     /* (internal) parse the whole input, until end of stream */
3948     PyObject* reader;
3949     PyObject* buffer;
3950     PyObject* temp;
3951     PyObject* res;
3952 
3953     if (!_check_xmlparser(self)) {
3954         return NULL;
3955     }
3956     reader = PyObject_GetAttrString(file, "read");
3957     if (!reader)
3958         return NULL;
3959 
3960     /* read from open file object */
3961     for (;;) {
3962 
3963         buffer = PyObject_CallFunction(reader, "i", 64*1024);
3964 
3965         if (!buffer) {
3966             /* read failed (e.g. due to KeyboardInterrupt) */
3967             Py_DECREF(reader);
3968             return NULL;
3969         }
3970 
3971         if (PyUnicode_CheckExact(buffer)) {
3972             /* A unicode object is encoded into bytes using UTF-8 */
3973             if (PyUnicode_GET_LENGTH(buffer) == 0) {
3974                 Py_DECREF(buffer);
3975                 break;
3976             }
3977             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3978             Py_DECREF(buffer);
3979             if (!temp) {
3980                 /* Propagate exception from PyUnicode_AsEncodedString */
3981                 Py_DECREF(reader);
3982                 return NULL;
3983             }
3984             buffer = temp;
3985         }
3986         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3987             Py_DECREF(buffer);
3988             break;
3989         }
3990 
3991         if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3992             Py_DECREF(buffer);
3993             Py_DECREF(reader);
3994             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3995             return NULL;
3996         }
3997         res = expat_parse(
3998             self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3999             );
4000 
4001         Py_DECREF(buffer);
4002 
4003         if (!res) {
4004             Py_DECREF(reader);
4005             return NULL;
4006         }
4007         Py_DECREF(res);
4008 
4009     }
4010 
4011     Py_DECREF(reader);
4012 
4013     res = expat_parse(self, "", 0, 1);
4014 
4015     if (res && TreeBuilder_CheckExact(self->target)) {
4016         Py_DECREF(res);
4017         return treebuilder_done((TreeBuilderObject*) self->target);
4018     }
4019 
4020     return res;
4021 }
4022 
4023 /*[clinic input]
4024 _elementtree.XMLParser._setevents
4025 
4026     events_queue: object
4027     events_to_report: object = None
4028     /
4029 
4030 [clinic start generated code]*/
4031 
4032 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4033 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4034                                        PyObject *events_queue,
4035                                        PyObject *events_to_report)
4036 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4037 {
4038     /* activate element event reporting */
4039     Py_ssize_t i;
4040     TreeBuilderObject *target;
4041     PyObject *events_append, *events_seq;
4042 
4043     if (!_check_xmlparser(self)) {
4044         return NULL;
4045     }
4046     if (!TreeBuilder_CheckExact(self->target)) {
4047         PyErr_SetString(
4048             PyExc_TypeError,
4049             "event handling only supported for ElementTree.TreeBuilder "
4050             "targets"
4051             );
4052         return NULL;
4053     }
4054 
4055     target = (TreeBuilderObject*) self->target;
4056 
4057     events_append = PyObject_GetAttrString(events_queue, "append");
4058     if (events_append == NULL)
4059         return NULL;
4060     Py_XSETREF(target->events_append, events_append);
4061 
4062     /* clear out existing events */
4063     Py_CLEAR(target->start_event_obj);
4064     Py_CLEAR(target->end_event_obj);
4065     Py_CLEAR(target->start_ns_event_obj);
4066     Py_CLEAR(target->end_ns_event_obj);
4067     Py_CLEAR(target->comment_event_obj);
4068     Py_CLEAR(target->pi_event_obj);
4069 
4070     if (events_to_report == Py_None) {
4071         /* default is "end" only */
4072         target->end_event_obj = PyUnicode_FromString("end");
4073         Py_RETURN_NONE;
4074     }
4075 
4076     if (!(events_seq = PySequence_Fast(events_to_report,
4077                                        "events must be a sequence"))) {
4078         return NULL;
4079     }
4080 
4081     for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4082         PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4083         const char *event_name = NULL;
4084         if (PyUnicode_Check(event_name_obj)) {
4085             event_name = PyUnicode_AsUTF8(event_name_obj);
4086         } else if (PyBytes_Check(event_name_obj)) {
4087             event_name = PyBytes_AS_STRING(event_name_obj);
4088         }
4089         if (event_name == NULL) {
4090             Py_DECREF(events_seq);
4091             PyErr_Format(PyExc_ValueError, "invalid events sequence");
4092             return NULL;
4093         }
4094 
4095         Py_INCREF(event_name_obj);
4096         if (strcmp(event_name, "start") == 0) {
4097             Py_XSETREF(target->start_event_obj, event_name_obj);
4098         } else if (strcmp(event_name, "end") == 0) {
4099             Py_XSETREF(target->end_event_obj, event_name_obj);
4100         } else if (strcmp(event_name, "start-ns") == 0) {
4101             Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4102             EXPAT(SetNamespaceDeclHandler)(
4103                 self->parser,
4104                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4105                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4106                 );
4107         } else if (strcmp(event_name, "end-ns") == 0) {
4108             Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4109             EXPAT(SetNamespaceDeclHandler)(
4110                 self->parser,
4111                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4112                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4113                 );
4114         } else if (strcmp(event_name, "comment") == 0) {
4115             Py_XSETREF(target->comment_event_obj, event_name_obj);
4116             EXPAT(SetCommentHandler)(
4117                 self->parser,
4118                 (XML_CommentHandler) expat_comment_handler
4119                 );
4120         } else if (strcmp(event_name, "pi") == 0) {
4121             Py_XSETREF(target->pi_event_obj, event_name_obj);
4122             EXPAT(SetProcessingInstructionHandler)(
4123                 self->parser,
4124                 (XML_ProcessingInstructionHandler) expat_pi_handler
4125                 );
4126         } else {
4127             Py_DECREF(event_name_obj);
4128             Py_DECREF(events_seq);
4129             PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4130             return NULL;
4131         }
4132     }
4133 
4134     Py_DECREF(events_seq);
4135     Py_RETURN_NONE;
4136 }
4137 
4138 static PyMemberDef xmlparser_members[] = {
4139     {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4140     {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4141     {NULL}
4142 };
4143 
4144 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4145 xmlparser_version_getter(XMLParserObject *self, void *closure)
4146 {
4147     return PyUnicode_FromFormat(
4148         "Expat %d.%d.%d", XML_MAJOR_VERSION,
4149         XML_MINOR_VERSION, XML_MICRO_VERSION);
4150 }
4151 
4152 static PyGetSetDef xmlparser_getsetlist[] = {
4153     {"version", (getter)xmlparser_version_getter, NULL, NULL},
4154     {NULL},
4155 };
4156 
4157 #include "clinic/_elementtree.c.h"
4158 
4159 static PyMethodDef element_methods[] = {
4160 
4161     _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4162 
4163     _ELEMENTTREE_ELEMENT_GET_METHODDEF
4164     _ELEMENTTREE_ELEMENT_SET_METHODDEF
4165 
4166     _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4167     _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4168     _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4169 
4170     _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4171     _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4172     _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4173     _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4174 
4175     _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4176     _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4177     _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4178 
4179     _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4180     _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4181 
4182     _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4183 
4184     _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4185     _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4186     _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4187     _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4188     _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4189 
4190     {NULL, NULL}
4191 };
4192 
4193 static PyMappingMethods element_as_mapping = {
4194     (lenfunc) element_length,
4195     (binaryfunc) element_subscr,
4196     (objobjargproc) element_ass_subscr,
4197 };
4198 
4199 static PyGetSetDef element_getsetlist[] = {
4200     {"tag",
4201         (getter)element_tag_getter,
4202         (setter)element_tag_setter,
4203         "A string identifying what kind of data this element represents"},
4204     {"text",
4205         (getter)element_text_getter,
4206         (setter)element_text_setter,
4207         "A string of text directly after the start tag, or None"},
4208     {"tail",
4209         (getter)element_tail_getter,
4210         (setter)element_tail_setter,
4211         "A string of text directly after the end tag, or None"},
4212     {"attrib",
4213         (getter)element_attrib_getter,
4214         (setter)element_attrib_setter,
4215         "A dictionary containing the element's attributes"},
4216     {NULL},
4217 };
4218 
4219 static PyTypeObject Element_Type = {
4220     PyVarObject_HEAD_INIT(NULL, 0)
4221     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4222     /* methods */
4223     (destructor)element_dealloc,                    /* tp_dealloc */
4224     0,                                              /* tp_vectorcall_offset */
4225     0,                                              /* tp_getattr */
4226     0,                                              /* tp_setattr */
4227     0,                                              /* tp_as_async */
4228     (reprfunc)element_repr,                         /* tp_repr */
4229     0,                                              /* tp_as_number */
4230     &element_as_sequence,                           /* tp_as_sequence */
4231     &element_as_mapping,                            /* tp_as_mapping */
4232     0,                                              /* tp_hash */
4233     0,                                              /* tp_call */
4234     0,                                              /* tp_str */
4235     PyObject_GenericGetAttr,                        /* tp_getattro */
4236     0,                                              /* tp_setattro */
4237     0,                                              /* tp_as_buffer */
4238     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4239                                                     /* tp_flags */
4240     0,                                              /* tp_doc */
4241     (traverseproc)element_gc_traverse,              /* tp_traverse */
4242     (inquiry)element_gc_clear,                      /* tp_clear */
4243     0,                                              /* tp_richcompare */
4244     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
4245     0,                                              /* tp_iter */
4246     0,                                              /* tp_iternext */
4247     element_methods,                                /* tp_methods */
4248     0,                                              /* tp_members */
4249     element_getsetlist,                             /* tp_getset */
4250     0,                                              /* tp_base */
4251     0,                                              /* tp_dict */
4252     0,                                              /* tp_descr_get */
4253     0,                                              /* tp_descr_set */
4254     0,                                              /* tp_dictoffset */
4255     (initproc)element_init,                         /* tp_init */
4256     PyType_GenericAlloc,                            /* tp_alloc */
4257     element_new,                                    /* tp_new */
4258     0,                                              /* tp_free */
4259 };
4260 
4261 static PyMethodDef treebuilder_methods[] = {
4262     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4263     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4264     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4265     _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4266     _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4267     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4268     {NULL, NULL}
4269 };
4270 
4271 static PyTypeObject TreeBuilder_Type = {
4272     PyVarObject_HEAD_INIT(NULL, 0)
4273     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4274     /* methods */
4275     (destructor)treebuilder_dealloc,                /* tp_dealloc */
4276     0,                                              /* tp_vectorcall_offset */
4277     0,                                              /* tp_getattr */
4278     0,                                              /* tp_setattr */
4279     0,                                              /* tp_as_async */
4280     0,                                              /* tp_repr */
4281     0,                                              /* tp_as_number */
4282     0,                                              /* tp_as_sequence */
4283     0,                                              /* tp_as_mapping */
4284     0,                                              /* tp_hash */
4285     0,                                              /* tp_call */
4286     0,                                              /* tp_str */
4287     0,                                              /* tp_getattro */
4288     0,                                              /* tp_setattro */
4289     0,                                              /* tp_as_buffer */
4290     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4291                                                     /* tp_flags */
4292     0,                                              /* tp_doc */
4293     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
4294     (inquiry)treebuilder_gc_clear,                  /* tp_clear */
4295     0,                                              /* tp_richcompare */
4296     0,                                              /* tp_weaklistoffset */
4297     0,                                              /* tp_iter */
4298     0,                                              /* tp_iternext */
4299     treebuilder_methods,                            /* tp_methods */
4300     0,                                              /* tp_members */
4301     0,                                              /* tp_getset */
4302     0,                                              /* tp_base */
4303     0,                                              /* tp_dict */
4304     0,                                              /* tp_descr_get */
4305     0,                                              /* tp_descr_set */
4306     0,                                              /* tp_dictoffset */
4307     _elementtree_TreeBuilder___init__,              /* tp_init */
4308     PyType_GenericAlloc,                            /* tp_alloc */
4309     treebuilder_new,                                /* tp_new */
4310     0,                                              /* tp_free */
4311 };
4312 
4313 static PyMethodDef xmlparser_methods[] = {
4314     _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4315     _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4316     _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4317     _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4318     {NULL, NULL}
4319 };
4320 
4321 static PyTypeObject XMLParser_Type = {
4322     PyVarObject_HEAD_INIT(NULL, 0)
4323     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4324     /* methods */
4325     (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4326     0,                                              /* tp_vectorcall_offset */
4327     0,                                              /* tp_getattr */
4328     0,                                              /* tp_setattr */
4329     0,                                              /* tp_as_async */
4330     0,                                              /* tp_repr */
4331     0,                                              /* tp_as_number */
4332     0,                                              /* tp_as_sequence */
4333     0,                                              /* tp_as_mapping */
4334     0,                                              /* tp_hash */
4335     0,                                              /* tp_call */
4336     0,                                              /* tp_str */
4337     0,                                              /* tp_getattro */
4338     0,                                              /* tp_setattro */
4339     0,                                              /* tp_as_buffer */
4340     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4341                                                     /* tp_flags */
4342     0,                                              /* tp_doc */
4343     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4344     (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4345     0,                                              /* tp_richcompare */
4346     0,                                              /* tp_weaklistoffset */
4347     0,                                              /* tp_iter */
4348     0,                                              /* tp_iternext */
4349     xmlparser_methods,                              /* tp_methods */
4350     xmlparser_members,                              /* tp_members */
4351     xmlparser_getsetlist,                           /* tp_getset */
4352     0,                                              /* tp_base */
4353     0,                                              /* tp_dict */
4354     0,                                              /* tp_descr_get */
4355     0,                                              /* tp_descr_set */
4356     0,                                              /* tp_dictoffset */
4357     _elementtree_XMLParser___init__,                /* tp_init */
4358     PyType_GenericAlloc,                            /* tp_alloc */
4359     xmlparser_new,                                  /* tp_new */
4360     0,                                              /* tp_free */
4361 };
4362 
4363 /* ==================================================================== */
4364 /* python module interface */
4365 
4366 static PyMethodDef _functions[] = {
4367     {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4368     _ELEMENTTREE__SET_FACTORIES_METHODDEF
4369     {NULL, NULL}
4370 };
4371 
4372 
4373 static struct PyModuleDef elementtreemodule = {
4374     PyModuleDef_HEAD_INIT,
4375     "_elementtree",
4376     NULL,
4377     sizeof(elementtreestate),
4378     _functions,
4379     NULL,
4380     elementtree_traverse,
4381     elementtree_clear,
4382     elementtree_free
4383 };
4384 
4385 PyMODINIT_FUNC
PyInit__elementtree(void)4386 PyInit__elementtree(void)
4387 {
4388     PyObject *m, *temp;
4389     elementtreestate *st;
4390 
4391     m = PyState_FindModule(&elementtreemodule);
4392     if (m) {
4393         Py_INCREF(m);
4394         return m;
4395     }
4396 
4397     /* Initialize object types */
4398     if (PyType_Ready(&ElementIter_Type) < 0)
4399         return NULL;
4400     if (PyType_Ready(&TreeBuilder_Type) < 0)
4401         return NULL;
4402     if (PyType_Ready(&Element_Type) < 0)
4403         return NULL;
4404     if (PyType_Ready(&XMLParser_Type) < 0)
4405         return NULL;
4406 
4407     m = PyModule_Create(&elementtreemodule);
4408     if (!m)
4409         return NULL;
4410     st = get_elementtree_state(m);
4411 
4412     if (!(temp = PyImport_ImportModule("copy")))
4413         return NULL;
4414     st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4415     Py_XDECREF(temp);
4416 
4417     if (st->deepcopy_obj == NULL) {
4418         return NULL;
4419     }
4420 
4421     assert(!PyErr_Occurred());
4422     if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4423         return NULL;
4424 
4425     /* link against pyexpat */
4426     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4427     if (expat_capi) {
4428         /* check that it's usable */
4429         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4430             (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4431             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4432             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4433             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4434             PyErr_SetString(PyExc_ImportError,
4435                             "pyexpat version is incompatible");
4436             return NULL;
4437         }
4438     } else {
4439         return NULL;
4440     }
4441 
4442     st->parseerror_obj = PyErr_NewException(
4443         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4444         );
4445     Py_INCREF(st->parseerror_obj);
4446     if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4447         Py_DECREF(st->parseerror_obj);
4448         return NULL;
4449     }
4450 
4451     PyTypeObject *types[] = {
4452         &Element_Type,
4453         &TreeBuilder_Type,
4454         &XMLParser_Type
4455     };
4456 
4457     for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4458         if (PyModule_AddType(m, types[i]) < 0) {
4459             return NULL;
4460         }
4461     }
4462 
4463     return m;
4464 }
4465