1 /*--------------------------------------------------------------------
2  * Licensed to PSF under a Contributor Agreement.
3  * See http://www.python.org/psf/license for licensing details.
4  *
5  * _elementtree - C accelerator for xml.etree.ElementTree
6  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7  * Copyright (c) 1999-2009 by Fredrik Lundh.
8  *
9  * info@pythonware.com
10  * http://www.pythonware.com
11  *--------------------------------------------------------------------
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 
16 #include "Python.h"
17 #include "structmember.h"
18 
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21 
22 /* An element can hold this many children without extra memory
23    allocations. */
24 #define STATIC_CHILDREN 4
25 
26 /* For best performance, chose a value so that 80-90% of all nodes
27    have no more than the given number of children.  Set this to zero
28    to minimize the size of the element structure itself (this only
29    helps if you have lots of leaf nodes with attributes). */
30 
31 /* Also note that pymalloc always allocates blocks in multiples of
32    eight bytes.  For the current C version of ElementTree, this means
33    that the number of children should be an even number, at least on
34    32-bit platforms. */
35 
36 /* -------------------------------------------------------------------- */
37 
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48 
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55 
56 /* macros used to store 'join' flags in string object pointers.  note
57    that all use of text and tail as object pointers must be wrapped in
58    JOIN_OBJ.  see comments in the ElementObject definition for more
59    info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63 
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68     PyObject *tmp = JOIN_OBJ(*p);
69     *p = new_joined_ptr;
70     Py_DECREF(tmp);
71 }
72 
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74  * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78     if (*p) {
79         _set_joined_ptr(p, NULL);
80     }
81 }
82 
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88 
89 
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92     PyObject *parseerror_obj;
93     PyObject *deepcopy_obj;
94     PyObject *elementpath_obj;
95 } elementtreestate;
96 
97 static struct PyModuleDef elementtreemodule;
98 
99 /* Given a module object (assumed to be _elementtree), get its per-module
100  * state.
101  */
102 #define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103 
104 /* Find the module instance imported in the currently running sub-interpreter
105  * and get its state.
106  */
107 #define ET_STATE_GLOBAL \
108     ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109 
110 static int
elementtree_clear(PyObject * m)111 elementtree_clear(PyObject *m)
112 {
113     elementtreestate *st = ET_STATE(m);
114     Py_CLEAR(st->parseerror_obj);
115     Py_CLEAR(st->deepcopy_obj);
116     Py_CLEAR(st->elementpath_obj);
117     return 0;
118 }
119 
120 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)121 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122 {
123     elementtreestate *st = ET_STATE(m);
124     Py_VISIT(st->parseerror_obj);
125     Py_VISIT(st->deepcopy_obj);
126     Py_VISIT(st->elementpath_obj);
127     return 0;
128 }
129 
130 static void
elementtree_free(void * m)131 elementtree_free(void *m)
132 {
133     elementtree_clear((PyObject *)m);
134 }
135 
136 /* helpers */
137 
138 LOCAL(PyObject*)
list_join(PyObject * list)139 list_join(PyObject* list)
140 {
141     /* join list elements */
142     PyObject* joiner;
143     PyObject* result;
144 
145     joiner = PyUnicode_FromStringAndSize("", 0);
146     if (!joiner)
147         return NULL;
148     result = PyUnicode_Join(joiner, list);
149     Py_DECREF(joiner);
150     return result;
151 }
152 
153 /* Is the given object an empty dictionary?
154 */
155 static int
is_empty_dict(PyObject * obj)156 is_empty_dict(PyObject *obj)
157 {
158     return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
159 }
160 
161 
162 /* -------------------------------------------------------------------- */
163 /* the Element type */
164 
165 typedef struct {
166 
167     /* attributes (a dictionary object), or None if no attributes */
168     PyObject* attrib;
169 
170     /* child elements */
171     Py_ssize_t length; /* actual number of items */
172     Py_ssize_t allocated; /* allocated items */
173 
174     /* this either points to _children or to a malloced buffer */
175     PyObject* *children;
176 
177     PyObject* _children[STATIC_CHILDREN];
178 
179 } ElementObjectExtra;
180 
181 typedef struct {
182     PyObject_HEAD
183 
184     /* element tag (a string). */
185     PyObject* tag;
186 
187     /* text before first child.  note that this is a tagged pointer;
188        use JOIN_OBJ to get the object pointer.  the join flag is used
189        to distinguish lists created by the tree builder from lists
190        assigned to the attribute by application code; the former
191        should be joined before being returned to the user, the latter
192        should be left intact. */
193     PyObject* text;
194 
195     /* text after this element, in parent.  note that this is a tagged
196        pointer; use JOIN_OBJ to get the object pointer. */
197     PyObject* tail;
198 
199     ElementObjectExtra* extra;
200 
201     PyObject *weakreflist; /* For tp_weaklistoffset */
202 
203 } ElementObject;
204 
205 
206 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
207 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208 
209 
210 /* -------------------------------------------------------------------- */
211 /* Element constructors and destructor */
212 
213 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)214 create_extra(ElementObject* self, PyObject* attrib)
215 {
216     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
217     if (!self->extra) {
218         PyErr_NoMemory();
219         return -1;
220     }
221 
222     if (!attrib)
223         attrib = Py_None;
224 
225     Py_INCREF(attrib);
226     self->extra->attrib = attrib;
227 
228     self->extra->length = 0;
229     self->extra->allocated = STATIC_CHILDREN;
230     self->extra->children = self->extra->_children;
231 
232     return 0;
233 }
234 
235 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)236 dealloc_extra(ElementObjectExtra *extra)
237 {
238     Py_ssize_t i;
239 
240     if (!extra)
241         return;
242 
243     Py_DECREF(extra->attrib);
244 
245     for (i = 0; i < extra->length; i++)
246         Py_DECREF(extra->children[i]);
247 
248     if (extra->children != extra->_children)
249         PyObject_Free(extra->children);
250 
251     PyObject_Free(extra);
252 }
253 
254 LOCAL(void)
clear_extra(ElementObject * self)255 clear_extra(ElementObject* self)
256 {
257     ElementObjectExtra *myextra;
258 
259     if (!self->extra)
260         return;
261 
262     /* Avoid DECREFs calling into this code again (cycles, etc.)
263     */
264     myextra = self->extra;
265     self->extra = NULL;
266 
267     dealloc_extra(myextra);
268 }
269 
270 /* Convenience internal function to create new Element objects with the given
271  * tag and attributes.
272 */
273 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)274 create_new_element(PyObject* tag, PyObject* attrib)
275 {
276     ElementObject* self;
277 
278     self = PyObject_GC_New(ElementObject, &Element_Type);
279     if (self == NULL)
280         return NULL;
281     self->extra = NULL;
282 
283     Py_INCREF(tag);
284     self->tag = tag;
285 
286     Py_INCREF(Py_None);
287     self->text = Py_None;
288 
289     Py_INCREF(Py_None);
290     self->tail = Py_None;
291 
292     self->weakreflist = NULL;
293 
294     ALLOC(sizeof(ElementObject), "create element");
295     PyObject_GC_Track(self);
296 
297     if (attrib != Py_None && !is_empty_dict(attrib)) {
298         if (create_extra(self, attrib) < 0) {
299             Py_DECREF(self);
300             return NULL;
301         }
302     }
303 
304     return (PyObject*) self;
305 }
306 
307 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)308 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309 {
310     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311     if (e != NULL) {
312         Py_INCREF(Py_None);
313         e->tag = Py_None;
314 
315         Py_INCREF(Py_None);
316         e->text = Py_None;
317 
318         Py_INCREF(Py_None);
319         e->tail = Py_None;
320 
321         e->extra = NULL;
322         e->weakreflist = NULL;
323     }
324     return (PyObject *)e;
325 }
326 
327 /* Helper function for extracting the attrib dictionary from a keywords dict.
328  * This is required by some constructors/functions in this module that can
329  * either accept attrib as a keyword argument or all attributes splashed
330  * directly into *kwds.
331  *
332  * Return a dictionary with the content of kwds merged into the content of
333  * attrib. If there is no attrib keyword, return a copy of kwds.
334  */
335 static PyObject*
get_attrib_from_keywords(PyObject * kwds)336 get_attrib_from_keywords(PyObject *kwds)
337 {
338     PyObject *attrib_str = PyUnicode_FromString("attrib");
339     if (attrib_str == NULL) {
340         return NULL;
341     }
342     PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
343 
344     if (attrib) {
345         /* If attrib was found in kwds, copy its value and remove it from
346          * kwds
347          */
348         if (!PyDict_Check(attrib)) {
349             Py_DECREF(attrib_str);
350             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351                          Py_TYPE(attrib)->tp_name);
352             return NULL;
353         }
354         attrib = PyDict_Copy(attrib);
355         if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356             Py_DECREF(attrib);
357             attrib = NULL;
358         }
359     } else {
360         attrib = PyDict_New();
361     }
362 
363     Py_DECREF(attrib_str);
364 
365     if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366         Py_DECREF(attrib);
367         return NULL;
368     }
369     return attrib;
370 }
371 
372 /*[clinic input]
373 module _elementtree
374 class _elementtree.Element "ElementObject *" "&Element_Type"
375 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377 [clinic start generated code]*/
378 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379 
380 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)381 element_init(PyObject *self, PyObject *args, PyObject *kwds)
382 {
383     PyObject *tag;
384     PyObject *attrib = NULL;
385     ElementObject *self_elem;
386 
387     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388         return -1;
389 
390     if (attrib) {
391         /* attrib passed as positional arg */
392         attrib = PyDict_Copy(attrib);
393         if (!attrib)
394             return -1;
395         if (kwds) {
396             if (PyDict_Update(attrib, kwds) < 0) {
397                 Py_DECREF(attrib);
398                 return -1;
399             }
400         }
401     } else if (kwds) {
402         /* have keywords args */
403         attrib = get_attrib_from_keywords(kwds);
404         if (!attrib)
405             return -1;
406     }
407 
408     self_elem = (ElementObject *)self;
409 
410     if (attrib != NULL && !is_empty_dict(attrib)) {
411         if (create_extra(self_elem, attrib) < 0) {
412             Py_DECREF(attrib);
413             return -1;
414         }
415     }
416 
417     /* We own a reference to attrib here and it's no longer needed. */
418     Py_XDECREF(attrib);
419 
420     /* Replace the objects already pointed to by tag, text and tail. */
421     Py_INCREF(tag);
422     Py_XSETREF(self_elem->tag, tag);
423 
424     Py_INCREF(Py_None);
425     _set_joined_ptr(&self_elem->text, Py_None);
426 
427     Py_INCREF(Py_None);
428     _set_joined_ptr(&self_elem->tail, Py_None);
429 
430     return 0;
431 }
432 
433 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)434 element_resize(ElementObject* self, Py_ssize_t extra)
435 {
436     Py_ssize_t size;
437     PyObject* *children;
438 
439     assert(extra >= 0);
440     /* make sure self->children can hold the given number of extra
441        elements.  set an exception and return -1 if allocation failed */
442 
443     if (!self->extra) {
444         if (create_extra(self, NULL) < 0)
445             return -1;
446     }
447 
448     size = self->extra->length + extra;  /* never overflows */
449 
450     if (size > self->extra->allocated) {
451         /* use Python 2.4's list growth strategy */
452         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
453         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
454          * which needs at least 4 bytes.
455          * Although it's a false alarm always assume at least one child to
456          * be safe.
457          */
458         size = size ? size : 1;
459         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460             goto nomemory;
461         if (self->extra->children != self->extra->_children) {
462             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
463              * "children", which needs at least 4 bytes. Although it's a
464              * false alarm always assume at least one child to be safe.
465              */
466             children = PyObject_Realloc(self->extra->children,
467                                         size * sizeof(PyObject*));
468             if (!children)
469                 goto nomemory;
470         } else {
471             children = PyObject_Malloc(size * sizeof(PyObject*));
472             if (!children)
473                 goto nomemory;
474             /* copy existing children from static area to malloc buffer */
475             memcpy(children, self->extra->children,
476                    self->extra->length * sizeof(PyObject*));
477         }
478         self->extra->children = children;
479         self->extra->allocated = size;
480     }
481 
482     return 0;
483 
484   nomemory:
485     PyErr_NoMemory();
486     return -1;
487 }
488 
489 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)490 element_add_subelement(ElementObject* self, PyObject* element)
491 {
492     /* add a child element to a parent */
493 
494     if (element_resize(self, 1) < 0)
495         return -1;
496 
497     Py_INCREF(element);
498     self->extra->children[self->extra->length] = element;
499 
500     self->extra->length++;
501 
502     return 0;
503 }
504 
505 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)506 element_get_attrib(ElementObject* self)
507 {
508     /* return borrowed reference to attrib dictionary */
509     /* note: this function assumes that the extra section exists */
510 
511     PyObject* res = self->extra->attrib;
512 
513     if (res == Py_None) {
514         /* create missing dictionary */
515         res = PyDict_New();
516         if (!res)
517             return NULL;
518         Py_DECREF(Py_None);
519         self->extra->attrib = res;
520     }
521 
522     return res;
523 }
524 
525 LOCAL(PyObject*)
element_get_text(ElementObject * self)526 element_get_text(ElementObject* self)
527 {
528     /* return borrowed reference to text attribute */
529 
530     PyObject *res = self->text;
531 
532     if (JOIN_GET(res)) {
533         res = JOIN_OBJ(res);
534         if (PyList_CheckExact(res)) {
535             PyObject *tmp = list_join(res);
536             if (!tmp)
537                 return NULL;
538             self->text = tmp;
539             Py_DECREF(res);
540             res = tmp;
541         }
542     }
543 
544     return res;
545 }
546 
547 LOCAL(PyObject*)
element_get_tail(ElementObject * self)548 element_get_tail(ElementObject* self)
549 {
550     /* return borrowed reference to text attribute */
551 
552     PyObject *res = self->tail;
553 
554     if (JOIN_GET(res)) {
555         res = JOIN_OBJ(res);
556         if (PyList_CheckExact(res)) {
557             PyObject *tmp = list_join(res);
558             if (!tmp)
559                 return NULL;
560             self->tail = tmp;
561             Py_DECREF(res);
562             res = tmp;
563         }
564     }
565 
566     return res;
567 }
568 
569 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)570 subelement(PyObject *self, PyObject *args, PyObject *kwds)
571 {
572     PyObject* elem;
573 
574     ElementObject* parent;
575     PyObject* tag;
576     PyObject* attrib = NULL;
577     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
578                           &Element_Type, &parent, &tag,
579                           &PyDict_Type, &attrib)) {
580         return NULL;
581     }
582 
583     if (attrib) {
584         /* attrib passed as positional arg */
585         attrib = PyDict_Copy(attrib);
586         if (!attrib)
587             return NULL;
588         if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
589             Py_DECREF(attrib);
590             return NULL;
591         }
592     } else if (kwds) {
593         /* have keyword args */
594         attrib = get_attrib_from_keywords(kwds);
595         if (!attrib)
596             return NULL;
597     } else {
598         /* no attrib arg, no kwds, so no attribute */
599         Py_INCREF(Py_None);
600         attrib = Py_None;
601     }
602 
603     elem = create_new_element(tag, attrib);
604     Py_DECREF(attrib);
605     if (elem == NULL)
606         return NULL;
607 
608     if (element_add_subelement(parent, elem) < 0) {
609         Py_DECREF(elem);
610         return NULL;
611     }
612 
613     return elem;
614 }
615 
616 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)617 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
618 {
619     Py_VISIT(self->tag);
620     Py_VISIT(JOIN_OBJ(self->text));
621     Py_VISIT(JOIN_OBJ(self->tail));
622 
623     if (self->extra) {
624         Py_ssize_t i;
625         Py_VISIT(self->extra->attrib);
626 
627         for (i = 0; i < self->extra->length; ++i)
628             Py_VISIT(self->extra->children[i]);
629     }
630     return 0;
631 }
632 
633 static int
element_gc_clear(ElementObject * self)634 element_gc_clear(ElementObject *self)
635 {
636     Py_CLEAR(self->tag);
637     _clear_joined_ptr(&self->text);
638     _clear_joined_ptr(&self->tail);
639 
640     /* After dropping all references from extra, it's no longer valid anyway,
641      * so fully deallocate it.
642     */
643     clear_extra(self);
644     return 0;
645 }
646 
647 static void
element_dealloc(ElementObject * self)648 element_dealloc(ElementObject* self)
649 {
650     /* bpo-31095: UnTrack is needed before calling any callbacks */
651     PyObject_GC_UnTrack(self);
652     Py_TRASHCAN_SAFE_BEGIN(self)
653 
654     if (self->weakreflist != NULL)
655         PyObject_ClearWeakRefs((PyObject *) self);
656 
657     /* element_gc_clear clears all references and deallocates extra
658     */
659     element_gc_clear(self);
660 
661     RELEASE(sizeof(ElementObject), "destroy element");
662     Py_TYPE(self)->tp_free((PyObject *)self);
663     Py_TRASHCAN_SAFE_END(self)
664 }
665 
666 /* -------------------------------------------------------------------- */
667 
668 /*[clinic input]
669 _elementtree.Element.append
670 
671     subelement: object(subclass_of='&Element_Type')
672     /
673 
674 [clinic start generated code]*/
675 
676 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)677 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
678 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
679 {
680     if (element_add_subelement(self, subelement) < 0)
681         return NULL;
682 
683     Py_RETURN_NONE;
684 }
685 
686 /*[clinic input]
687 _elementtree.Element.clear
688 
689 [clinic start generated code]*/
690 
691 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)692 _elementtree_Element_clear_impl(ElementObject *self)
693 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
694 {
695     clear_extra(self);
696 
697     Py_INCREF(Py_None);
698     _set_joined_ptr(&self->text, Py_None);
699 
700     Py_INCREF(Py_None);
701     _set_joined_ptr(&self->tail, Py_None);
702 
703     Py_RETURN_NONE;
704 }
705 
706 /*[clinic input]
707 _elementtree.Element.__copy__
708 
709 [clinic start generated code]*/
710 
711 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)712 _elementtree_Element___copy___impl(ElementObject *self)
713 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
714 {
715     Py_ssize_t i;
716     ElementObject* element;
717 
718     element = (ElementObject*) create_new_element(
719         self->tag, (self->extra) ? self->extra->attrib : Py_None);
720     if (!element)
721         return NULL;
722 
723     Py_INCREF(JOIN_OBJ(self->text));
724     _set_joined_ptr(&element->text, self->text);
725 
726     Py_INCREF(JOIN_OBJ(self->tail));
727     _set_joined_ptr(&element->tail, self->tail);
728 
729     assert(!element->extra || !element->extra->length);
730     if (self->extra) {
731         if (element_resize(element, self->extra->length) < 0) {
732             Py_DECREF(element);
733             return NULL;
734         }
735 
736         for (i = 0; i < self->extra->length; i++) {
737             Py_INCREF(self->extra->children[i]);
738             element->extra->children[i] = self->extra->children[i];
739         }
740 
741         assert(!element->extra->length);
742         element->extra->length = self->extra->length;
743     }
744 
745     return (PyObject*) element;
746 }
747 
748 /* Helper for a deep copy. */
749 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
750 
751 /*[clinic input]
752 _elementtree.Element.__deepcopy__
753 
754     memo: object(subclass_of="&PyDict_Type")
755     /
756 
757 [clinic start generated code]*/
758 
759 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)760 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
761 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
762 {
763     Py_ssize_t i;
764     ElementObject* element;
765     PyObject* tag;
766     PyObject* attrib;
767     PyObject* text;
768     PyObject* tail;
769     PyObject* id;
770 
771     tag = deepcopy(self->tag, memo);
772     if (!tag)
773         return NULL;
774 
775     if (self->extra) {
776         attrib = deepcopy(self->extra->attrib, memo);
777         if (!attrib) {
778             Py_DECREF(tag);
779             return NULL;
780         }
781     } else {
782         Py_INCREF(Py_None);
783         attrib = Py_None;
784     }
785 
786     element = (ElementObject*) create_new_element(tag, attrib);
787 
788     Py_DECREF(tag);
789     Py_DECREF(attrib);
790 
791     if (!element)
792         return NULL;
793 
794     text = deepcopy(JOIN_OBJ(self->text), memo);
795     if (!text)
796         goto error;
797     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
798 
799     tail = deepcopy(JOIN_OBJ(self->tail), memo);
800     if (!tail)
801         goto error;
802     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
803 
804     assert(!element->extra || !element->extra->length);
805     if (self->extra) {
806         if (element_resize(element, self->extra->length) < 0)
807             goto error;
808 
809         for (i = 0; i < self->extra->length; i++) {
810             PyObject* child = deepcopy(self->extra->children[i], memo);
811             if (!child) {
812                 element->extra->length = i;
813                 goto error;
814             }
815             element->extra->children[i] = child;
816         }
817 
818         assert(!element->extra->length);
819         element->extra->length = self->extra->length;
820     }
821 
822     /* add object to memo dictionary (so deepcopy won't visit it again) */
823     id = PyLong_FromSsize_t((uintptr_t) self);
824     if (!id)
825         goto error;
826 
827     i = PyDict_SetItem(memo, id, (PyObject*) element);
828 
829     Py_DECREF(id);
830 
831     if (i < 0)
832         goto error;
833 
834     return (PyObject*) element;
835 
836   error:
837     Py_DECREF(element);
838     return NULL;
839 }
840 
841 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)842 deepcopy(PyObject *object, PyObject *memo)
843 {
844     /* do a deep copy of the given object */
845     elementtreestate *st;
846     PyObject *stack[2];
847 
848     /* Fast paths */
849     if (object == Py_None || PyUnicode_CheckExact(object)) {
850         Py_INCREF(object);
851         return object;
852     }
853 
854     if (Py_REFCNT(object) == 1) {
855         if (PyDict_CheckExact(object)) {
856             PyObject *key, *value;
857             Py_ssize_t pos = 0;
858             int simple = 1;
859             while (PyDict_Next(object, &pos, &key, &value)) {
860                 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
861                     simple = 0;
862                     break;
863                 }
864             }
865             if (simple)
866                 return PyDict_Copy(object);
867             /* Fall through to general case */
868         }
869         else if (Element_CheckExact(object)) {
870             return _elementtree_Element___deepcopy___impl(
871                 (ElementObject *)object, memo);
872         }
873     }
874 
875     /* General case */
876     st = ET_STATE_GLOBAL;
877     if (!st->deepcopy_obj) {
878         PyErr_SetString(PyExc_RuntimeError,
879                         "deepcopy helper not found");
880         return NULL;
881     }
882 
883     stack[0] = object;
884     stack[1] = memo;
885     return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
886 }
887 
888 
889 /*[clinic input]
890 _elementtree.Element.__sizeof__ -> Py_ssize_t
891 
892 [clinic start generated code]*/
893 
894 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)895 _elementtree_Element___sizeof___impl(ElementObject *self)
896 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
897 {
898     Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
899     if (self->extra) {
900         result += sizeof(ElementObjectExtra);
901         if (self->extra->children != self->extra->_children)
902             result += sizeof(PyObject*) * self->extra->allocated;
903     }
904     return result;
905 }
906 
907 /* dict keys for getstate/setstate. */
908 #define PICKLED_TAG "tag"
909 #define PICKLED_CHILDREN "_children"
910 #define PICKLED_ATTRIB "attrib"
911 #define PICKLED_TAIL "tail"
912 #define PICKLED_TEXT "text"
913 
914 /* __getstate__ returns a fabricated instance dict as in the pure-Python
915  * Element implementation, for interoperability/interchangeability.  This
916  * makes the pure-Python implementation details an API, but (a) there aren't
917  * any unnecessary structures there; and (b) it buys compatibility with 3.2
918  * pickles.  See issue #16076.
919  */
920 /*[clinic input]
921 _elementtree.Element.__getstate__
922 
923 [clinic start generated code]*/
924 
925 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)926 _elementtree_Element___getstate___impl(ElementObject *self)
927 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
928 {
929     Py_ssize_t i;
930     PyObject *children, *attrib;
931 
932     /* Build a list of children. */
933     children = PyList_New(self->extra ? self->extra->length : 0);
934     if (!children)
935         return NULL;
936     for (i = 0; i < PyList_GET_SIZE(children); i++) {
937         PyObject *child = self->extra->children[i];
938         Py_INCREF(child);
939         PyList_SET_ITEM(children, i, child);
940     }
941 
942     if (self->extra && self->extra->attrib != Py_None) {
943         attrib = self->extra->attrib;
944         Py_INCREF(attrib);
945     }
946     else {
947         attrib = PyDict_New();
948         if (!attrib) {
949             Py_DECREF(children);
950             return NULL;
951         }
952     }
953 
954     return Py_BuildValue("{sOsNsNsOsO}",
955                          PICKLED_TAG, self->tag,
956                          PICKLED_CHILDREN, children,
957                          PICKLED_ATTRIB, attrib,
958                          PICKLED_TEXT, JOIN_OBJ(self->text),
959                          PICKLED_TAIL, JOIN_OBJ(self->tail));
960 }
961 
962 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)963 element_setstate_from_attributes(ElementObject *self,
964                                  PyObject *tag,
965                                  PyObject *attrib,
966                                  PyObject *text,
967                                  PyObject *tail,
968                                  PyObject *children)
969 {
970     Py_ssize_t i, nchildren;
971     ElementObjectExtra *oldextra = NULL;
972 
973     if (!tag) {
974         PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
975         return NULL;
976     }
977 
978     Py_INCREF(tag);
979     Py_XSETREF(self->tag, tag);
980 
981     text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
982     Py_INCREF(JOIN_OBJ(text));
983     _set_joined_ptr(&self->text, text);
984 
985     tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
986     Py_INCREF(JOIN_OBJ(tail));
987     _set_joined_ptr(&self->tail, tail);
988 
989     /* Handle ATTRIB and CHILDREN. */
990     if (!children && !attrib) {
991         Py_RETURN_NONE;
992     }
993 
994     /* Compute 'nchildren'. */
995     if (children) {
996         if (!PyList_Check(children)) {
997             PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
998             return NULL;
999         }
1000         nchildren = PyList_GET_SIZE(children);
1001 
1002         /* (Re-)allocate 'extra'.
1003            Avoid DECREFs calling into this code again (cycles, etc.)
1004          */
1005         oldextra = self->extra;
1006         self->extra = NULL;
1007         if (element_resize(self, nchildren)) {
1008             assert(!self->extra || !self->extra->length);
1009             clear_extra(self);
1010             self->extra = oldextra;
1011             return NULL;
1012         }
1013         assert(self->extra);
1014         assert(self->extra->allocated >= nchildren);
1015         if (oldextra) {
1016             assert(self->extra->attrib == Py_None);
1017             self->extra->attrib = oldextra->attrib;
1018             oldextra->attrib = Py_None;
1019         }
1020 
1021         /* Copy children */
1022         for (i = 0; i < nchildren; i++) {
1023             self->extra->children[i] = PyList_GET_ITEM(children, i);
1024             Py_INCREF(self->extra->children[i]);
1025         }
1026 
1027         assert(!self->extra->length);
1028         self->extra->length = nchildren;
1029     }
1030     else {
1031         if (element_resize(self, 0)) {
1032             return NULL;
1033         }
1034     }
1035 
1036     /* Stash attrib. */
1037     if (attrib) {
1038         Py_INCREF(attrib);
1039         Py_XSETREF(self->extra->attrib, attrib);
1040     }
1041     dealloc_extra(oldextra);
1042 
1043     Py_RETURN_NONE;
1044 }
1045 
1046 /* __setstate__ for Element instance from the Python implementation.
1047  * 'state' should be the instance dict.
1048  */
1049 
1050 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1051 element_setstate_from_Python(ElementObject *self, PyObject *state)
1052 {
1053     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1054                              PICKLED_TAIL, PICKLED_CHILDREN, 0};
1055     PyObject *args;
1056     PyObject *tag, *attrib, *text, *tail, *children;
1057     PyObject *retval;
1058 
1059     tag = attrib = text = tail = children = NULL;
1060     args = PyTuple_New(0);
1061     if (!args)
1062         return NULL;
1063 
1064     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1065                                     &attrib, &text, &tail, &children))
1066         retval = element_setstate_from_attributes(self, tag, attrib, text,
1067                                                   tail, children);
1068     else
1069         retval = NULL;
1070 
1071     Py_DECREF(args);
1072     return retval;
1073 }
1074 
1075 /*[clinic input]
1076 _elementtree.Element.__setstate__
1077 
1078     state: object
1079     /
1080 
1081 [clinic start generated code]*/
1082 
1083 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1084 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1085 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1086 {
1087     if (!PyDict_CheckExact(state)) {
1088         PyErr_Format(PyExc_TypeError,
1089                      "Don't know how to unpickle \"%.200R\" as an Element",
1090                      state);
1091         return NULL;
1092     }
1093     else
1094         return element_setstate_from_Python(self, state);
1095 }
1096 
1097 LOCAL(int)
checkpath(PyObject * tag)1098 checkpath(PyObject* tag)
1099 {
1100     Py_ssize_t i;
1101     int check = 1;
1102 
1103     /* check if a tag contains an xpath character */
1104 
1105 #define PATHCHAR(ch) \
1106     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1107 
1108     if (PyUnicode_Check(tag)) {
1109         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1110         void *data = PyUnicode_DATA(tag);
1111         unsigned int kind = PyUnicode_KIND(tag);
1112         for (i = 0; i < len; i++) {
1113             Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1114             if (ch == '{')
1115                 check = 0;
1116             else if (ch == '}')
1117                 check = 1;
1118             else if (check && PATHCHAR(ch))
1119                 return 1;
1120         }
1121         return 0;
1122     }
1123     if (PyBytes_Check(tag)) {
1124         char *p = PyBytes_AS_STRING(tag);
1125         for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
1126             if (p[i] == '{')
1127                 check = 0;
1128             else if (p[i] == '}')
1129                 check = 1;
1130             else if (check && PATHCHAR(p[i]))
1131                 return 1;
1132         }
1133         return 0;
1134     }
1135 
1136     return 1; /* unknown type; might be path expression */
1137 }
1138 
1139 /*[clinic input]
1140 _elementtree.Element.extend
1141 
1142     elements: object
1143     /
1144 
1145 [clinic start generated code]*/
1146 
1147 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1148 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1149 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1150 {
1151     PyObject* seq;
1152     Py_ssize_t i;
1153 
1154     seq = PySequence_Fast(elements, "");
1155     if (!seq) {
1156         PyErr_Format(
1157             PyExc_TypeError,
1158             "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1159             );
1160         return NULL;
1161     }
1162 
1163     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1164         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1165         Py_INCREF(element);
1166         if (!Element_Check(element)) {
1167             PyErr_Format(
1168                 PyExc_TypeError,
1169                 "expected an Element, not \"%.200s\"",
1170                 Py_TYPE(element)->tp_name);
1171             Py_DECREF(seq);
1172             Py_DECREF(element);
1173             return NULL;
1174         }
1175 
1176         if (element_add_subelement(self, element) < 0) {
1177             Py_DECREF(seq);
1178             Py_DECREF(element);
1179             return NULL;
1180         }
1181         Py_DECREF(element);
1182     }
1183 
1184     Py_DECREF(seq);
1185 
1186     Py_RETURN_NONE;
1187 }
1188 
1189 /*[clinic input]
1190 _elementtree.Element.find
1191 
1192     path: object
1193     namespaces: object = None
1194 
1195 [clinic start generated code]*/
1196 
1197 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1198 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1199                                PyObject *namespaces)
1200 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1201 {
1202     Py_ssize_t i;
1203     elementtreestate *st = ET_STATE_GLOBAL;
1204 
1205     if (checkpath(path) || namespaces != Py_None) {
1206         _Py_IDENTIFIER(find);
1207         return _PyObject_CallMethodIdObjArgs(
1208             st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1209             );
1210     }
1211 
1212     if (!self->extra)
1213         Py_RETURN_NONE;
1214 
1215     for (i = 0; i < self->extra->length; i++) {
1216         PyObject* item = self->extra->children[i];
1217         int rc;
1218         if (!Element_Check(item))
1219             continue;
1220         Py_INCREF(item);
1221         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1222         if (rc > 0)
1223             return item;
1224         Py_DECREF(item);
1225         if (rc < 0)
1226             return NULL;
1227     }
1228 
1229     Py_RETURN_NONE;
1230 }
1231 
1232 /*[clinic input]
1233 _elementtree.Element.findtext
1234 
1235     path: object
1236     default: object = None
1237     namespaces: object = None
1238 
1239 [clinic start generated code]*/
1240 
1241 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1242 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1243                                    PyObject *default_value,
1244                                    PyObject *namespaces)
1245 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1246 {
1247     Py_ssize_t i;
1248     _Py_IDENTIFIER(findtext);
1249     elementtreestate *st = ET_STATE_GLOBAL;
1250 
1251     if (checkpath(path) || namespaces != Py_None)
1252         return _PyObject_CallMethodIdObjArgs(
1253             st->elementpath_obj, &PyId_findtext,
1254             self, path, default_value, namespaces, NULL
1255             );
1256 
1257     if (!self->extra) {
1258         Py_INCREF(default_value);
1259         return default_value;
1260     }
1261 
1262     for (i = 0; i < self->extra->length; i++) {
1263         PyObject *item = self->extra->children[i];
1264         int rc;
1265         if (!Element_Check(item))
1266             continue;
1267         Py_INCREF(item);
1268         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1269         if (rc > 0) {
1270             PyObject* text = element_get_text((ElementObject*)item);
1271             if (text == Py_None) {
1272                 Py_DECREF(item);
1273                 return PyUnicode_New(0, 0);
1274             }
1275             Py_XINCREF(text);
1276             Py_DECREF(item);
1277             return text;
1278         }
1279         Py_DECREF(item);
1280         if (rc < 0)
1281             return NULL;
1282     }
1283 
1284     Py_INCREF(default_value);
1285     return default_value;
1286 }
1287 
1288 /*[clinic input]
1289 _elementtree.Element.findall
1290 
1291     path: object
1292     namespaces: object = None
1293 
1294 [clinic start generated code]*/
1295 
1296 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1297 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1298                                   PyObject *namespaces)
1299 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1300 {
1301     Py_ssize_t i;
1302     PyObject* out;
1303     elementtreestate *st = ET_STATE_GLOBAL;
1304 
1305     if (checkpath(path) || namespaces != Py_None) {
1306         _Py_IDENTIFIER(findall);
1307         return _PyObject_CallMethodIdObjArgs(
1308             st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1309             );
1310     }
1311 
1312     out = PyList_New(0);
1313     if (!out)
1314         return NULL;
1315 
1316     if (!self->extra)
1317         return out;
1318 
1319     for (i = 0; i < self->extra->length; i++) {
1320         PyObject* item = self->extra->children[i];
1321         int rc;
1322         if (!Element_Check(item))
1323             continue;
1324         Py_INCREF(item);
1325         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1326         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1327             Py_DECREF(item);
1328             Py_DECREF(out);
1329             return NULL;
1330         }
1331         Py_DECREF(item);
1332     }
1333 
1334     return out;
1335 }
1336 
1337 /*[clinic input]
1338 _elementtree.Element.iterfind
1339 
1340     path: object
1341     namespaces: object = None
1342 
1343 [clinic start generated code]*/
1344 
1345 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1346 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1347                                    PyObject *namespaces)
1348 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1349 {
1350     PyObject* tag = path;
1351     _Py_IDENTIFIER(iterfind);
1352     elementtreestate *st = ET_STATE_GLOBAL;
1353 
1354     return _PyObject_CallMethodIdObjArgs(
1355         st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1356 }
1357 
1358 /*[clinic input]
1359 _elementtree.Element.get
1360 
1361     key: object
1362     default: object = None
1363 
1364 [clinic start generated code]*/
1365 
1366 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1367 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1368                               PyObject *default_value)
1369 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1370 {
1371     PyObject* value;
1372 
1373     if (!self->extra || self->extra->attrib == Py_None)
1374         value = default_value;
1375     else {
1376         value = PyDict_GetItem(self->extra->attrib, key);
1377         if (!value)
1378             value = default_value;
1379     }
1380 
1381     Py_INCREF(value);
1382     return value;
1383 }
1384 
1385 /*[clinic input]
1386 _elementtree.Element.getchildren
1387 
1388 [clinic start generated code]*/
1389 
1390 static PyObject *
_elementtree_Element_getchildren_impl(ElementObject * self)1391 _elementtree_Element_getchildren_impl(ElementObject *self)
1392 /*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
1393 {
1394     Py_ssize_t i;
1395     PyObject* list;
1396 
1397     if (PyErr_WarnEx(PyExc_DeprecationWarning,
1398                      "This method will be removed in future versions.  "
1399                      "Use 'list(elem)' or iteration over elem instead.",
1400                      1) < 0) {
1401         return NULL;
1402     }
1403 
1404     if (!self->extra)
1405         return PyList_New(0);
1406 
1407     list = PyList_New(self->extra->length);
1408     if (!list)
1409         return NULL;
1410 
1411     for (i = 0; i < self->extra->length; i++) {
1412         PyObject* item = self->extra->children[i];
1413         Py_INCREF(item);
1414         PyList_SET_ITEM(list, i, item);
1415     }
1416 
1417     return list;
1418 }
1419 
1420 
1421 static PyObject *
1422 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1423 
1424 
1425 /*[clinic input]
1426 _elementtree.Element.iter
1427 
1428     tag: object = None
1429 
1430 [clinic start generated code]*/
1431 
1432 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1433 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1434 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1435 {
1436     if (PyUnicode_Check(tag)) {
1437         if (PyUnicode_READY(tag) < 0)
1438             return NULL;
1439         if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1440             tag = Py_None;
1441     }
1442     else if (PyBytes_Check(tag)) {
1443         if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1444             tag = Py_None;
1445     }
1446 
1447     return create_elementiter(self, tag, 0);
1448 }
1449 
1450 
1451 /*[clinic input]
1452 _elementtree.Element.getiterator
1453 
1454     tag: object = None
1455 
1456 [clinic start generated code]*/
1457 
1458 static PyObject *
_elementtree_Element_getiterator_impl(ElementObject * self,PyObject * tag)1459 _elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1460 /*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1461 {
1462     /* Change for a DeprecationWarning in 1.4 */
1463     if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1464                      "This method will be removed in future versions.  "
1465                      "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1466                      1) < 0) {
1467         return NULL;
1468     }
1469     return _elementtree_Element_iter_impl(self, tag);
1470 }
1471 
1472 
1473 /*[clinic input]
1474 _elementtree.Element.itertext
1475 
1476 [clinic start generated code]*/
1477 
1478 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1479 _elementtree_Element_itertext_impl(ElementObject *self)
1480 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1481 {
1482     return create_elementiter(self, Py_None, 1);
1483 }
1484 
1485 
1486 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1487 element_getitem(PyObject* self_, Py_ssize_t index)
1488 {
1489     ElementObject* self = (ElementObject*) self_;
1490 
1491     if (!self->extra || index < 0 || index >= self->extra->length) {
1492         PyErr_SetString(
1493             PyExc_IndexError,
1494             "child index out of range"
1495             );
1496         return NULL;
1497     }
1498 
1499     Py_INCREF(self->extra->children[index]);
1500     return self->extra->children[index];
1501 }
1502 
1503 /*[clinic input]
1504 _elementtree.Element.insert
1505 
1506     index: Py_ssize_t
1507     subelement: object(subclass_of='&Element_Type')
1508     /
1509 
1510 [clinic start generated code]*/
1511 
1512 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1513 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1514                                  PyObject *subelement)
1515 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1516 {
1517     Py_ssize_t i;
1518 
1519     if (!self->extra) {
1520         if (create_extra(self, NULL) < 0)
1521             return NULL;
1522     }
1523 
1524     if (index < 0) {
1525         index += self->extra->length;
1526         if (index < 0)
1527             index = 0;
1528     }
1529     if (index > self->extra->length)
1530         index = self->extra->length;
1531 
1532     if (element_resize(self, 1) < 0)
1533         return NULL;
1534 
1535     for (i = self->extra->length; i > index; i--)
1536         self->extra->children[i] = self->extra->children[i-1];
1537 
1538     Py_INCREF(subelement);
1539     self->extra->children[index] = subelement;
1540 
1541     self->extra->length++;
1542 
1543     Py_RETURN_NONE;
1544 }
1545 
1546 /*[clinic input]
1547 _elementtree.Element.items
1548 
1549 [clinic start generated code]*/
1550 
1551 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1552 _elementtree_Element_items_impl(ElementObject *self)
1553 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1554 {
1555     if (!self->extra || self->extra->attrib == Py_None)
1556         return PyList_New(0);
1557 
1558     return PyDict_Items(self->extra->attrib);
1559 }
1560 
1561 /*[clinic input]
1562 _elementtree.Element.keys
1563 
1564 [clinic start generated code]*/
1565 
1566 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1567 _elementtree_Element_keys_impl(ElementObject *self)
1568 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1569 {
1570     if (!self->extra || self->extra->attrib == Py_None)
1571         return PyList_New(0);
1572 
1573     return PyDict_Keys(self->extra->attrib);
1574 }
1575 
1576 static Py_ssize_t
element_length(ElementObject * self)1577 element_length(ElementObject* self)
1578 {
1579     if (!self->extra)
1580         return 0;
1581 
1582     return self->extra->length;
1583 }
1584 
1585 /*[clinic input]
1586 _elementtree.Element.makeelement
1587 
1588     tag: object
1589     attrib: object
1590     /
1591 
1592 [clinic start generated code]*/
1593 
1594 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1595 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1596                                       PyObject *attrib)
1597 /*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
1598 {
1599     PyObject* elem;
1600 
1601     attrib = PyDict_Copy(attrib);
1602     if (!attrib)
1603         return NULL;
1604 
1605     elem = create_new_element(tag, attrib);
1606 
1607     Py_DECREF(attrib);
1608 
1609     return elem;
1610 }
1611 
1612 /*[clinic input]
1613 _elementtree.Element.remove
1614 
1615     subelement: object(subclass_of='&Element_Type')
1616     /
1617 
1618 [clinic start generated code]*/
1619 
1620 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1621 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1622 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1623 {
1624     Py_ssize_t i;
1625     int rc;
1626     PyObject *found;
1627 
1628     if (!self->extra) {
1629         /* element has no children, so raise exception */
1630         PyErr_SetString(
1631             PyExc_ValueError,
1632             "list.remove(x): x not in list"
1633             );
1634         return NULL;
1635     }
1636 
1637     for (i = 0; i < self->extra->length; i++) {
1638         if (self->extra->children[i] == subelement)
1639             break;
1640         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1641         if (rc > 0)
1642             break;
1643         if (rc < 0)
1644             return NULL;
1645     }
1646 
1647     if (i >= self->extra->length) {
1648         /* subelement is not in children, so raise exception */
1649         PyErr_SetString(
1650             PyExc_ValueError,
1651             "list.remove(x): x not in list"
1652             );
1653         return NULL;
1654     }
1655 
1656     found = self->extra->children[i];
1657 
1658     self->extra->length--;
1659     for (; i < self->extra->length; i++)
1660         self->extra->children[i] = self->extra->children[i+1];
1661 
1662     Py_DECREF(found);
1663     Py_RETURN_NONE;
1664 }
1665 
1666 static PyObject*
element_repr(ElementObject * self)1667 element_repr(ElementObject* self)
1668 {
1669     int status;
1670 
1671     if (self->tag == NULL)
1672         return PyUnicode_FromFormat("<Element at %p>", self);
1673 
1674     status = Py_ReprEnter((PyObject *)self);
1675     if (status == 0) {
1676         PyObject *res;
1677         res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1678         Py_ReprLeave((PyObject *)self);
1679         return res;
1680     }
1681     if (status > 0)
1682         PyErr_Format(PyExc_RuntimeError,
1683                      "reentrant call inside %s.__repr__",
1684                      Py_TYPE(self)->tp_name);
1685     return NULL;
1686 }
1687 
1688 /*[clinic input]
1689 _elementtree.Element.set
1690 
1691     key: object
1692     value: object
1693     /
1694 
1695 [clinic start generated code]*/
1696 
1697 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1698 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1699                               PyObject *value)
1700 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1701 {
1702     PyObject* attrib;
1703 
1704     if (!self->extra) {
1705         if (create_extra(self, NULL) < 0)
1706             return NULL;
1707     }
1708 
1709     attrib = element_get_attrib(self);
1710     if (!attrib)
1711         return NULL;
1712 
1713     if (PyDict_SetItem(attrib, key, value) < 0)
1714         return NULL;
1715 
1716     Py_RETURN_NONE;
1717 }
1718 
1719 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1720 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1721 {
1722     ElementObject* self = (ElementObject*) self_;
1723     Py_ssize_t i;
1724     PyObject* old;
1725 
1726     if (!self->extra || index < 0 || index >= self->extra->length) {
1727         PyErr_SetString(
1728             PyExc_IndexError,
1729             "child assignment index out of range");
1730         return -1;
1731     }
1732 
1733     old = self->extra->children[index];
1734 
1735     if (item) {
1736         Py_INCREF(item);
1737         self->extra->children[index] = item;
1738     } else {
1739         self->extra->length--;
1740         for (i = index; i < self->extra->length; i++)
1741             self->extra->children[i] = self->extra->children[i+1];
1742     }
1743 
1744     Py_DECREF(old);
1745 
1746     return 0;
1747 }
1748 
1749 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1750 element_subscr(PyObject* self_, PyObject* item)
1751 {
1752     ElementObject* self = (ElementObject*) self_;
1753 
1754     if (PyIndex_Check(item)) {
1755         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1756 
1757         if (i == -1 && PyErr_Occurred()) {
1758             return NULL;
1759         }
1760         if (i < 0 && self->extra)
1761             i += self->extra->length;
1762         return element_getitem(self_, i);
1763     }
1764     else if (PySlice_Check(item)) {
1765         Py_ssize_t start, stop, step, slicelen, i;
1766         size_t cur;
1767         PyObject* list;
1768 
1769         if (!self->extra)
1770             return PyList_New(0);
1771 
1772         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1773             return NULL;
1774         }
1775         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1776                                          step);
1777 
1778         if (slicelen <= 0)
1779             return PyList_New(0);
1780         else {
1781             list = PyList_New(slicelen);
1782             if (!list)
1783                 return NULL;
1784 
1785             for (cur = start, i = 0; i < slicelen;
1786                  cur += step, i++) {
1787                 PyObject* item = self->extra->children[cur];
1788                 Py_INCREF(item);
1789                 PyList_SET_ITEM(list, i, item);
1790             }
1791 
1792             return list;
1793         }
1794     }
1795     else {
1796         PyErr_SetString(PyExc_TypeError,
1797                 "element indices must be integers");
1798         return NULL;
1799     }
1800 }
1801 
1802 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1803 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1804 {
1805     ElementObject* self = (ElementObject*) self_;
1806 
1807     if (PyIndex_Check(item)) {
1808         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1809 
1810         if (i == -1 && PyErr_Occurred()) {
1811             return -1;
1812         }
1813         if (i < 0 && self->extra)
1814             i += self->extra->length;
1815         return element_setitem(self_, i, value);
1816     }
1817     else if (PySlice_Check(item)) {
1818         Py_ssize_t start, stop, step, slicelen, newlen, i;
1819         size_t cur;
1820 
1821         PyObject* recycle = NULL;
1822         PyObject* seq;
1823 
1824         if (!self->extra) {
1825             if (create_extra(self, NULL) < 0)
1826                 return -1;
1827         }
1828 
1829         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1830             return -1;
1831         }
1832         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1833                                          step);
1834 
1835         if (value == NULL) {
1836             /* Delete slice */
1837             size_t cur;
1838             Py_ssize_t i;
1839 
1840             if (slicelen <= 0)
1841                 return 0;
1842 
1843             /* Since we're deleting, the direction of the range doesn't matter,
1844              * so for simplicity make it always ascending.
1845             */
1846             if (step < 0) {
1847                 stop = start + 1;
1848                 start = stop + step * (slicelen - 1) - 1;
1849                 step = -step;
1850             }
1851 
1852             assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1853 
1854             /* recycle is a list that will contain all the children
1855              * scheduled for removal.
1856             */
1857             if (!(recycle = PyList_New(slicelen))) {
1858                 return -1;
1859             }
1860 
1861             /* This loop walks over all the children that have to be deleted,
1862              * with cur pointing at them. num_moved is the amount of children
1863              * until the next deleted child that have to be "shifted down" to
1864              * occupy the deleted's places.
1865              * Note that in the ith iteration, shifting is done i+i places down
1866              * because i children were already removed.
1867             */
1868             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1869                 /* Compute how many children have to be moved, clipping at the
1870                  * list end.
1871                 */
1872                 Py_ssize_t num_moved = step - 1;
1873                 if (cur + step >= (size_t)self->extra->length) {
1874                     num_moved = self->extra->length - cur - 1;
1875                 }
1876 
1877                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1878 
1879                 memmove(
1880                     self->extra->children + cur - i,
1881                     self->extra->children + cur + 1,
1882                     num_moved * sizeof(PyObject *));
1883             }
1884 
1885             /* Leftover "tail" after the last removed child */
1886             cur = start + (size_t)slicelen * step;
1887             if (cur < (size_t)self->extra->length) {
1888                 memmove(
1889                     self->extra->children + cur - slicelen,
1890                     self->extra->children + cur,
1891                     (self->extra->length - cur) * sizeof(PyObject *));
1892             }
1893 
1894             self->extra->length -= slicelen;
1895 
1896             /* Discard the recycle list with all the deleted sub-elements */
1897             Py_DECREF(recycle);
1898             return 0;
1899         }
1900 
1901         /* A new slice is actually being assigned */
1902         seq = PySequence_Fast(value, "");
1903         if (!seq) {
1904             PyErr_Format(
1905                 PyExc_TypeError,
1906                 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1907                 );
1908             return -1;
1909         }
1910         newlen = PySequence_Fast_GET_SIZE(seq);
1911 
1912         if (step !=  1 && newlen != slicelen)
1913         {
1914             Py_DECREF(seq);
1915             PyErr_Format(PyExc_ValueError,
1916                 "attempt to assign sequence of size %zd "
1917                 "to extended slice of size %zd",
1918                 newlen, slicelen
1919                 );
1920             return -1;
1921         }
1922 
1923         /* Resize before creating the recycle bin, to prevent refleaks. */
1924         if (newlen > slicelen) {
1925             if (element_resize(self, newlen - slicelen) < 0) {
1926                 Py_DECREF(seq);
1927                 return -1;
1928             }
1929         }
1930 
1931         if (slicelen > 0) {
1932             /* to avoid recursive calls to this method (via decref), move
1933                old items to the recycle bin here, and get rid of them when
1934                we're done modifying the element */
1935             recycle = PyList_New(slicelen);
1936             if (!recycle) {
1937                 Py_DECREF(seq);
1938                 return -1;
1939             }
1940             for (cur = start, i = 0; i < slicelen;
1941                  cur += step, i++)
1942                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1943         }
1944 
1945         if (newlen < slicelen) {
1946             /* delete slice */
1947             for (i = stop; i < self->extra->length; i++)
1948                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1949         } else if (newlen > slicelen) {
1950             /* insert slice */
1951             for (i = self->extra->length-1; i >= stop; i--)
1952                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1953         }
1954 
1955         /* replace the slice */
1956         for (cur = start, i = 0; i < newlen;
1957              cur += step, i++) {
1958             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1959             Py_INCREF(element);
1960             self->extra->children[cur] = element;
1961         }
1962 
1963         self->extra->length += newlen - slicelen;
1964 
1965         Py_DECREF(seq);
1966 
1967         /* discard the recycle bin, and everything in it */
1968         Py_XDECREF(recycle);
1969 
1970         return 0;
1971     }
1972     else {
1973         PyErr_SetString(PyExc_TypeError,
1974                 "element indices must be integers");
1975         return -1;
1976     }
1977 }
1978 
1979 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1980 element_tag_getter(ElementObject *self, void *closure)
1981 {
1982     PyObject *res = self->tag;
1983     Py_INCREF(res);
1984     return res;
1985 }
1986 
1987 static PyObject*
element_text_getter(ElementObject * self,void * closure)1988 element_text_getter(ElementObject *self, void *closure)
1989 {
1990     PyObject *res = element_get_text(self);
1991     Py_XINCREF(res);
1992     return res;
1993 }
1994 
1995 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1996 element_tail_getter(ElementObject *self, void *closure)
1997 {
1998     PyObject *res = element_get_tail(self);
1999     Py_XINCREF(res);
2000     return res;
2001 }
2002 
2003 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)2004 element_attrib_getter(ElementObject *self, void *closure)
2005 {
2006     PyObject *res;
2007     if (!self->extra) {
2008         if (create_extra(self, NULL) < 0)
2009             return NULL;
2010     }
2011     res = element_get_attrib(self);
2012     Py_XINCREF(res);
2013     return res;
2014 }
2015 
2016 /* macro for setter validation */
2017 #define _VALIDATE_ATTR_VALUE(V)                     \
2018     if ((V) == NULL) {                              \
2019         PyErr_SetString(                            \
2020             PyExc_AttributeError,                   \
2021             "can't delete element attribute");      \
2022         return -1;                                  \
2023     }
2024 
2025 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2026 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2027 {
2028     _VALIDATE_ATTR_VALUE(value);
2029     Py_INCREF(value);
2030     Py_SETREF(self->tag, value);
2031     return 0;
2032 }
2033 
2034 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2035 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2036 {
2037     _VALIDATE_ATTR_VALUE(value);
2038     Py_INCREF(value);
2039     _set_joined_ptr(&self->text, value);
2040     return 0;
2041 }
2042 
2043 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2044 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2045 {
2046     _VALIDATE_ATTR_VALUE(value);
2047     Py_INCREF(value);
2048     _set_joined_ptr(&self->tail, value);
2049     return 0;
2050 }
2051 
2052 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2053 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2054 {
2055     _VALIDATE_ATTR_VALUE(value);
2056     if (!self->extra) {
2057         if (create_extra(self, NULL) < 0)
2058             return -1;
2059     }
2060     Py_INCREF(value);
2061     Py_SETREF(self->extra->attrib, value);
2062     return 0;
2063 }
2064 
2065 static PySequenceMethods element_as_sequence = {
2066     (lenfunc) element_length,
2067     0, /* sq_concat */
2068     0, /* sq_repeat */
2069     element_getitem,
2070     0,
2071     element_setitem,
2072     0,
2073 };
2074 
2075 /******************************* Element iterator ****************************/
2076 
2077 /* ElementIterObject represents the iteration state over an XML element in
2078  * pre-order traversal. To keep track of which sub-element should be returned
2079  * next, a stack of parents is maintained. This is a standard stack-based
2080  * iterative pre-order traversal of a tree.
2081  * The stack is managed using a continuous array.
2082  * Each stack item contains the saved parent to which we should return after
2083  * the current one is exhausted, and the next child to examine in that parent.
2084  */
2085 typedef struct ParentLocator_t {
2086     ElementObject *parent;
2087     Py_ssize_t child_index;
2088 } ParentLocator;
2089 
2090 typedef struct {
2091     PyObject_HEAD
2092     ParentLocator *parent_stack;
2093     Py_ssize_t parent_stack_used;
2094     Py_ssize_t parent_stack_size;
2095     ElementObject *root_element;
2096     PyObject *sought_tag;
2097     int gettext;
2098 } ElementIterObject;
2099 
2100 
2101 static void
elementiter_dealloc(ElementIterObject * it)2102 elementiter_dealloc(ElementIterObject *it)
2103 {
2104     Py_ssize_t i = it->parent_stack_used;
2105     it->parent_stack_used = 0;
2106     /* bpo-31095: UnTrack is needed before calling any callbacks */
2107     PyObject_GC_UnTrack(it);
2108     while (i--)
2109         Py_XDECREF(it->parent_stack[i].parent);
2110     PyMem_Free(it->parent_stack);
2111 
2112     Py_XDECREF(it->sought_tag);
2113     Py_XDECREF(it->root_element);
2114 
2115     PyObject_GC_Del(it);
2116 }
2117 
2118 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2119 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2120 {
2121     Py_ssize_t i = it->parent_stack_used;
2122     while (i--)
2123         Py_VISIT(it->parent_stack[i].parent);
2124 
2125     Py_VISIT(it->root_element);
2126     Py_VISIT(it->sought_tag);
2127     return 0;
2128 }
2129 
2130 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2131  */
2132 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2133 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2134 {
2135     ParentLocator *item;
2136 
2137     if (it->parent_stack_used >= it->parent_stack_size) {
2138         Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2139         ParentLocator *parent_stack = it->parent_stack;
2140         PyMem_Resize(parent_stack, ParentLocator, new_size);
2141         if (parent_stack == NULL)
2142             return -1;
2143         it->parent_stack = parent_stack;
2144         it->parent_stack_size = new_size;
2145     }
2146     item = it->parent_stack + it->parent_stack_used++;
2147     Py_INCREF(parent);
2148     item->parent = parent;
2149     item->child_index = 0;
2150     return 0;
2151 }
2152 
2153 static PyObject *
elementiter_next(ElementIterObject * it)2154 elementiter_next(ElementIterObject *it)
2155 {
2156     /* Sub-element iterator.
2157      *
2158      * A short note on gettext: this function serves both the iter() and
2159      * itertext() methods to avoid code duplication. However, there are a few
2160      * small differences in the way these iterations work. Namely:
2161      *   - itertext() only yields text from nodes that have it, and continues
2162      *     iterating when a node doesn't have text (so it doesn't return any
2163      *     node like iter())
2164      *   - itertext() also has to handle tail, after finishing with all the
2165      *     children of a node.
2166      */
2167     int rc;
2168     ElementObject *elem;
2169     PyObject *text;
2170 
2171     while (1) {
2172         /* Handle the case reached in the beginning and end of iteration, where
2173          * the parent stack is empty. If root_element is NULL and we're here, the
2174          * iterator is exhausted.
2175          */
2176         if (!it->parent_stack_used) {
2177             if (!it->root_element) {
2178                 PyErr_SetNone(PyExc_StopIteration);
2179                 return NULL;
2180             }
2181 
2182             elem = it->root_element;  /* steals a reference */
2183             it->root_element = NULL;
2184         }
2185         else {
2186             /* See if there are children left to traverse in the current parent. If
2187              * yes, visit the next child. If not, pop the stack and try again.
2188              */
2189             ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2190             Py_ssize_t child_index = item->child_index;
2191             ElementObjectExtra *extra;
2192             elem = item->parent;
2193             extra = elem->extra;
2194             if (!extra || child_index >= extra->length) {
2195                 it->parent_stack_used--;
2196                 /* Note that extra condition on it->parent_stack_used here;
2197                  * this is because itertext() is supposed to only return *inner*
2198                  * text, not text following the element it began iteration with.
2199                  */
2200                 if (it->gettext && it->parent_stack_used) {
2201                     text = element_get_tail(elem);
2202                     goto gettext;
2203                 }
2204                 Py_DECREF(elem);
2205                 continue;
2206             }
2207 
2208             if (!Element_Check(extra->children[child_index])) {
2209                 PyErr_Format(PyExc_AttributeError,
2210                              "'%.100s' object has no attribute 'iter'",
2211                              Py_TYPE(extra->children[child_index])->tp_name);
2212                 return NULL;
2213             }
2214             elem = (ElementObject *)extra->children[child_index];
2215             item->child_index++;
2216             Py_INCREF(elem);
2217         }
2218 
2219         if (parent_stack_push_new(it, elem) < 0) {
2220             Py_DECREF(elem);
2221             PyErr_NoMemory();
2222             return NULL;
2223         }
2224         if (it->gettext) {
2225             text = element_get_text(elem);
2226             goto gettext;
2227         }
2228 
2229         if (it->sought_tag == Py_None)
2230             return (PyObject *)elem;
2231 
2232         rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2233         if (rc > 0)
2234             return (PyObject *)elem;
2235 
2236         Py_DECREF(elem);
2237         if (rc < 0)
2238             return NULL;
2239         continue;
2240 
2241 gettext:
2242         if (!text) {
2243             Py_DECREF(elem);
2244             return NULL;
2245         }
2246         if (text == Py_None) {
2247             Py_DECREF(elem);
2248         }
2249         else {
2250             Py_INCREF(text);
2251             Py_DECREF(elem);
2252             rc = PyObject_IsTrue(text);
2253             if (rc > 0)
2254                 return text;
2255             Py_DECREF(text);
2256             if (rc < 0)
2257                 return NULL;
2258         }
2259     }
2260 
2261     return NULL;
2262 }
2263 
2264 
2265 static PyTypeObject ElementIter_Type = {
2266     PyVarObject_HEAD_INIT(NULL, 0)
2267     /* Using the module's name since the pure-Python implementation does not
2268        have such a type. */
2269     "_elementtree._element_iterator",           /* tp_name */
2270     sizeof(ElementIterObject),                  /* tp_basicsize */
2271     0,                                          /* tp_itemsize */
2272     /* methods */
2273     (destructor)elementiter_dealloc,            /* tp_dealloc */
2274     0,                                          /* tp_print */
2275     0,                                          /* tp_getattr */
2276     0,                                          /* tp_setattr */
2277     0,                                          /* tp_reserved */
2278     0,                                          /* tp_repr */
2279     0,                                          /* tp_as_number */
2280     0,                                          /* tp_as_sequence */
2281     0,                                          /* tp_as_mapping */
2282     0,                                          /* tp_hash */
2283     0,                                          /* tp_call */
2284     0,                                          /* tp_str */
2285     0,                                          /* tp_getattro */
2286     0,                                          /* tp_setattro */
2287     0,                                          /* tp_as_buffer */
2288     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2289     0,                                          /* tp_doc */
2290     (traverseproc)elementiter_traverse,         /* tp_traverse */
2291     0,                                          /* tp_clear */
2292     0,                                          /* tp_richcompare */
2293     0,                                          /* tp_weaklistoffset */
2294     PyObject_SelfIter,                          /* tp_iter */
2295     (iternextfunc)elementiter_next,             /* tp_iternext */
2296     0,                                          /* tp_methods */
2297     0,                                          /* tp_members */
2298     0,                                          /* tp_getset */
2299     0,                                          /* tp_base */
2300     0,                                          /* tp_dict */
2301     0,                                          /* tp_descr_get */
2302     0,                                          /* tp_descr_set */
2303     0,                                          /* tp_dictoffset */
2304     0,                                          /* tp_init */
2305     0,                                          /* tp_alloc */
2306     0,                                          /* tp_new */
2307 };
2308 
2309 #define INIT_PARENT_STACK_SIZE 8
2310 
2311 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2312 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2313 {
2314     ElementIterObject *it;
2315 
2316     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2317     if (!it)
2318         return NULL;
2319 
2320     Py_INCREF(tag);
2321     it->sought_tag = tag;
2322     it->gettext = gettext;
2323     Py_INCREF(self);
2324     it->root_element = self;
2325 
2326     PyObject_GC_Track(it);
2327 
2328     it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2329     if (it->parent_stack == NULL) {
2330         Py_DECREF(it);
2331         PyErr_NoMemory();
2332         return NULL;
2333     }
2334     it->parent_stack_used = 0;
2335     it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2336 
2337     return (PyObject *)it;
2338 }
2339 
2340 
2341 /* ==================================================================== */
2342 /* the tree builder type */
2343 
2344 typedef struct {
2345     PyObject_HEAD
2346 
2347     PyObject *root; /* root node (first created node) */
2348 
2349     PyObject *this; /* current node */
2350     PyObject *last; /* most recently created node */
2351 
2352     PyObject *data; /* data collector (string or list), or NULL */
2353 
2354     PyObject *stack; /* element stack */
2355     Py_ssize_t index; /* current stack size (0 means empty) */
2356 
2357     PyObject *element_factory;
2358 
2359     /* element tracing */
2360     PyObject *events_append; /* the append method of the list of events, or NULL */
2361     PyObject *start_event_obj; /* event objects (NULL to ignore) */
2362     PyObject *end_event_obj;
2363     PyObject *start_ns_event_obj;
2364     PyObject *end_ns_event_obj;
2365 } TreeBuilderObject;
2366 
2367 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
2368 
2369 /* -------------------------------------------------------------------- */
2370 /* constructor and destructor */
2371 
2372 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2373 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2374 {
2375     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2376     if (t != NULL) {
2377         t->root = NULL;
2378 
2379         Py_INCREF(Py_None);
2380         t->this = Py_None;
2381         Py_INCREF(Py_None);
2382         t->last = Py_None;
2383 
2384         t->data = NULL;
2385         t->element_factory = NULL;
2386         t->stack = PyList_New(20);
2387         if (!t->stack) {
2388             Py_DECREF(t->this);
2389             Py_DECREF(t->last);
2390             Py_DECREF((PyObject *) t);
2391             return NULL;
2392         }
2393         t->index = 0;
2394 
2395         t->events_append = NULL;
2396         t->start_event_obj = t->end_event_obj = NULL;
2397         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2398     }
2399     return (PyObject *)t;
2400 }
2401 
2402 /*[clinic input]
2403 _elementtree.TreeBuilder.__init__
2404 
2405     element_factory: object = NULL
2406 
2407 [clinic start generated code]*/
2408 
2409 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory)2410 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2411                                        PyObject *element_factory)
2412 /*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2413 {
2414     if (element_factory) {
2415         Py_INCREF(element_factory);
2416         Py_XSETREF(self->element_factory, element_factory);
2417     }
2418 
2419     return 0;
2420 }
2421 
2422 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2423 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2424 {
2425     Py_VISIT(self->end_ns_event_obj);
2426     Py_VISIT(self->start_ns_event_obj);
2427     Py_VISIT(self->end_event_obj);
2428     Py_VISIT(self->start_event_obj);
2429     Py_VISIT(self->events_append);
2430     Py_VISIT(self->root);
2431     Py_VISIT(self->this);
2432     Py_VISIT(self->last);
2433     Py_VISIT(self->data);
2434     Py_VISIT(self->stack);
2435     Py_VISIT(self->element_factory);
2436     return 0;
2437 }
2438 
2439 static int
treebuilder_gc_clear(TreeBuilderObject * self)2440 treebuilder_gc_clear(TreeBuilderObject *self)
2441 {
2442     Py_CLEAR(self->end_ns_event_obj);
2443     Py_CLEAR(self->start_ns_event_obj);
2444     Py_CLEAR(self->end_event_obj);
2445     Py_CLEAR(self->start_event_obj);
2446     Py_CLEAR(self->events_append);
2447     Py_CLEAR(self->stack);
2448     Py_CLEAR(self->data);
2449     Py_CLEAR(self->last);
2450     Py_CLEAR(self->this);
2451     Py_CLEAR(self->element_factory);
2452     Py_CLEAR(self->root);
2453     return 0;
2454 }
2455 
2456 static void
treebuilder_dealloc(TreeBuilderObject * self)2457 treebuilder_dealloc(TreeBuilderObject *self)
2458 {
2459     PyObject_GC_UnTrack(self);
2460     treebuilder_gc_clear(self);
2461     Py_TYPE(self)->tp_free((PyObject *)self);
2462 }
2463 
2464 /* -------------------------------------------------------------------- */
2465 /* helpers for handling of arbitrary element-like objects */
2466 
2467 static int
treebuilder_set_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2468 treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
2469                                      PyObject **dest, _Py_Identifier *name)
2470 {
2471     if (Element_CheckExact(element)) {
2472         PyObject *tmp = JOIN_OBJ(*dest);
2473         *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2474         *data = NULL;
2475         Py_DECREF(tmp);
2476         return 0;
2477     }
2478     else {
2479         PyObject *joined = list_join(*data);
2480         int r;
2481         if (joined == NULL)
2482             return -1;
2483         r = _PyObject_SetAttrId(element, name, joined);
2484         Py_DECREF(joined);
2485         if (r < 0)
2486             return -1;
2487         Py_CLEAR(*data);
2488         return 0;
2489     }
2490 }
2491 
2492 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2493 treebuilder_flush_data(TreeBuilderObject* self)
2494 {
2495     PyObject *element = self->last;
2496 
2497     if (!self->data) {
2498         return 0;
2499     }
2500 
2501     if (self->this == element) {
2502         _Py_IDENTIFIER(text);
2503         return treebuilder_set_element_text_or_tail(
2504                 element, &self->data,
2505                 &((ElementObject *) element)->text, &PyId_text);
2506     }
2507     else {
2508         _Py_IDENTIFIER(tail);
2509         return treebuilder_set_element_text_or_tail(
2510                 element, &self->data,
2511                 &((ElementObject *) element)->tail, &PyId_tail);
2512     }
2513 }
2514 
2515 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2516 treebuilder_add_subelement(PyObject *element, PyObject *child)
2517 {
2518     _Py_IDENTIFIER(append);
2519     if (Element_CheckExact(element)) {
2520         ElementObject *elem = (ElementObject *) element;
2521         return element_add_subelement(elem, child);
2522     }
2523     else {
2524         PyObject *res;
2525         res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
2526         if (res == NULL)
2527             return -1;
2528         Py_DECREF(res);
2529         return 0;
2530     }
2531 }
2532 
2533 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2534 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2535                          PyObject *node)
2536 {
2537     if (action != NULL) {
2538         PyObject *res;
2539         PyObject *event = PyTuple_Pack(2, action, node);
2540         if (event == NULL)
2541             return -1;
2542         res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2543         Py_DECREF(event);
2544         if (res == NULL)
2545             return -1;
2546         Py_DECREF(res);
2547     }
2548     return 0;
2549 }
2550 
2551 /* -------------------------------------------------------------------- */
2552 /* handlers */
2553 
2554 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2555 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2556                          PyObject* attrib)
2557 {
2558     PyObject* node;
2559     PyObject* this;
2560     elementtreestate *st = ET_STATE_GLOBAL;
2561 
2562     if (treebuilder_flush_data(self) < 0) {
2563         return NULL;
2564     }
2565 
2566     if (!self->element_factory || self->element_factory == Py_None) {
2567         node = create_new_element(tag, attrib);
2568     } else if (attrib == Py_None) {
2569         attrib = PyDict_New();
2570         if (!attrib)
2571             return NULL;
2572         node = PyObject_CallFunctionObjArgs(self->element_factory,
2573                                             tag, attrib, NULL);
2574         Py_DECREF(attrib);
2575     }
2576     else {
2577         node = PyObject_CallFunctionObjArgs(self->element_factory,
2578                                             tag, attrib, NULL);
2579     }
2580     if (!node) {
2581         return NULL;
2582     }
2583 
2584     this = self->this;
2585 
2586     if (this != Py_None) {
2587         if (treebuilder_add_subelement(this, node) < 0)
2588             goto error;
2589     } else {
2590         if (self->root) {
2591             PyErr_SetString(
2592                 st->parseerror_obj,
2593                 "multiple elements on top level"
2594                 );
2595             goto error;
2596         }
2597         Py_INCREF(node);
2598         self->root = node;
2599     }
2600 
2601     if (self->index < PyList_GET_SIZE(self->stack)) {
2602         if (PyList_SetItem(self->stack, self->index, this) < 0)
2603             goto error;
2604         Py_INCREF(this);
2605     } else {
2606         if (PyList_Append(self->stack, this) < 0)
2607             goto error;
2608     }
2609     self->index++;
2610 
2611     Py_INCREF(node);
2612     Py_SETREF(self->this, node);
2613     Py_INCREF(node);
2614     Py_SETREF(self->last, node);
2615 
2616     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2617         goto error;
2618 
2619     return node;
2620 
2621   error:
2622     Py_DECREF(node);
2623     return NULL;
2624 }
2625 
2626 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2627 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2628 {
2629     if (!self->data) {
2630         if (self->last == Py_None) {
2631             /* ignore calls to data before the first call to start */
2632             Py_RETURN_NONE;
2633         }
2634         /* store the first item as is */
2635         Py_INCREF(data); self->data = data;
2636     } else {
2637         /* more than one item; use a list to collect items */
2638         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2639             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2640             /* XXX this code path unused in Python 3? */
2641             /* expat often generates single character data sections; handle
2642                the most common case by resizing the existing string... */
2643             Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2644             if (_PyBytes_Resize(&self->data, size + 1) < 0)
2645                 return NULL;
2646             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2647         } else if (PyList_CheckExact(self->data)) {
2648             if (PyList_Append(self->data, data) < 0)
2649                 return NULL;
2650         } else {
2651             PyObject* list = PyList_New(2);
2652             if (!list)
2653                 return NULL;
2654             PyList_SET_ITEM(list, 0, self->data);
2655             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2656             self->data = list;
2657         }
2658     }
2659 
2660     Py_RETURN_NONE;
2661 }
2662 
2663 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2664 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2665 {
2666     PyObject* item;
2667 
2668     if (treebuilder_flush_data(self) < 0) {
2669         return NULL;
2670     }
2671 
2672     if (self->index == 0) {
2673         PyErr_SetString(
2674             PyExc_IndexError,
2675             "pop from empty stack"
2676             );
2677         return NULL;
2678     }
2679 
2680     item = self->last;
2681     self->last = self->this;
2682     self->index--;
2683     self->this = PyList_GET_ITEM(self->stack, self->index);
2684     Py_INCREF(self->this);
2685     Py_DECREF(item);
2686 
2687     if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2688         return NULL;
2689 
2690     Py_INCREF(self->last);
2691     return (PyObject*) self->last;
2692 }
2693 
2694 /* -------------------------------------------------------------------- */
2695 /* methods (in alphabetical order) */
2696 
2697 /*[clinic input]
2698 _elementtree.TreeBuilder.data
2699 
2700     data: object
2701     /
2702 
2703 [clinic start generated code]*/
2704 
2705 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2706 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2707 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2708 {
2709     return treebuilder_handle_data(self, data);
2710 }
2711 
2712 /*[clinic input]
2713 _elementtree.TreeBuilder.end
2714 
2715     tag: object
2716     /
2717 
2718 [clinic start generated code]*/
2719 
2720 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2721 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2722 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2723 {
2724     return treebuilder_handle_end(self, tag);
2725 }
2726 
2727 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)2728 treebuilder_done(TreeBuilderObject* self)
2729 {
2730     PyObject* res;
2731 
2732     /* FIXME: check stack size? */
2733 
2734     if (self->root)
2735         res = self->root;
2736     else
2737         res = Py_None;
2738 
2739     Py_INCREF(res);
2740     return res;
2741 }
2742 
2743 /*[clinic input]
2744 _elementtree.TreeBuilder.close
2745 
2746 [clinic start generated code]*/
2747 
2748 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)2749 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2750 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2751 {
2752     return treebuilder_done(self);
2753 }
2754 
2755 /*[clinic input]
2756 _elementtree.TreeBuilder.start
2757 
2758     tag: object
2759     attrs: object = None
2760     /
2761 
2762 [clinic start generated code]*/
2763 
2764 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)2765 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2766                                     PyObject *attrs)
2767 /*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
2768 {
2769     return treebuilder_handle_start(self, tag, attrs);
2770 }
2771 
2772 /* ==================================================================== */
2773 /* the expat interface */
2774 
2775 #include "expat.h"
2776 #include "pyexpat.h"
2777 
2778 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2779  * cached globally without being in per-module state.
2780  */
2781 static struct PyExpat_CAPI *expat_capi;
2782 #define EXPAT(func) (expat_capi->func)
2783 
2784 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2785     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2786 
2787 typedef struct {
2788     PyObject_HEAD
2789 
2790     XML_Parser parser;
2791 
2792     PyObject *target;
2793     PyObject *entity;
2794 
2795     PyObject *names;
2796 
2797     PyObject *handle_start;
2798     PyObject *handle_data;
2799     PyObject *handle_end;
2800 
2801     PyObject *handle_comment;
2802     PyObject *handle_pi;
2803     PyObject *handle_doctype;
2804 
2805     PyObject *handle_close;
2806 
2807 } XMLParserObject;
2808 
2809 static PyObject*
2810 _elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
2811 static PyObject *
2812 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2813                                     PyObject *pubid, PyObject *system);
2814 
2815 /* helpers */
2816 
2817 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)2818 makeuniversal(XMLParserObject* self, const char* string)
2819 {
2820     /* convert a UTF-8 tag/attribute name from the expat parser
2821        to a universal name string */
2822 
2823     Py_ssize_t size = (Py_ssize_t) strlen(string);
2824     PyObject* key;
2825     PyObject* value;
2826 
2827     /* look the 'raw' name up in the names dictionary */
2828     key = PyBytes_FromStringAndSize(string, size);
2829     if (!key)
2830         return NULL;
2831 
2832     value = PyDict_GetItem(self->names, key);
2833 
2834     if (value) {
2835         Py_INCREF(value);
2836     } else {
2837         /* new name.  convert to universal name, and decode as
2838            necessary */
2839 
2840         PyObject* tag;
2841         char* p;
2842         Py_ssize_t i;
2843 
2844         /* look for namespace separator */
2845         for (i = 0; i < size; i++)
2846             if (string[i] == '}')
2847                 break;
2848         if (i != size) {
2849             /* convert to universal name */
2850             tag = PyBytes_FromStringAndSize(NULL, size+1);
2851             if (tag == NULL) {
2852                 Py_DECREF(key);
2853                 return NULL;
2854             }
2855             p = PyBytes_AS_STRING(tag);
2856             p[0] = '{';
2857             memcpy(p+1, string, size);
2858             size++;
2859         } else {
2860             /* plain name; use key as tag */
2861             Py_INCREF(key);
2862             tag = key;
2863         }
2864 
2865         /* decode universal name */
2866         p = PyBytes_AS_STRING(tag);
2867         value = PyUnicode_DecodeUTF8(p, size, "strict");
2868         Py_DECREF(tag);
2869         if (!value) {
2870             Py_DECREF(key);
2871             return NULL;
2872         }
2873 
2874         /* add to names dictionary */
2875         if (PyDict_SetItem(self->names, key, value) < 0) {
2876             Py_DECREF(key);
2877             Py_DECREF(value);
2878             return NULL;
2879         }
2880     }
2881 
2882     Py_DECREF(key);
2883     return value;
2884 }
2885 
2886 /* Set the ParseError exception with the given parameters.
2887  * If message is not NULL, it's used as the error string. Otherwise, the
2888  * message string is the default for the given error_code.
2889 */
2890 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)2891 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2892                 const char *message)
2893 {
2894     PyObject *errmsg, *error, *position, *code;
2895     elementtreestate *st = ET_STATE_GLOBAL;
2896 
2897     errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
2898                 message ? message : EXPAT(ErrorString)(error_code),
2899                 line, column);
2900     if (errmsg == NULL)
2901         return;
2902 
2903     error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
2904     Py_DECREF(errmsg);
2905     if (!error)
2906         return;
2907 
2908     /* Add code and position attributes */
2909     code = PyLong_FromLong((long)error_code);
2910     if (!code) {
2911         Py_DECREF(error);
2912         return;
2913     }
2914     if (PyObject_SetAttrString(error, "code", code) == -1) {
2915         Py_DECREF(error);
2916         Py_DECREF(code);
2917         return;
2918     }
2919     Py_DECREF(code);
2920 
2921     position = Py_BuildValue("(nn)", line, column);
2922     if (!position) {
2923         Py_DECREF(error);
2924         return;
2925     }
2926     if (PyObject_SetAttrString(error, "position", position) == -1) {
2927         Py_DECREF(error);
2928         Py_DECREF(position);
2929         return;
2930     }
2931     Py_DECREF(position);
2932 
2933     PyErr_SetObject(st->parseerror_obj, error);
2934     Py_DECREF(error);
2935 }
2936 
2937 /* -------------------------------------------------------------------- */
2938 /* handlers */
2939 
2940 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)2941 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2942                       int data_len)
2943 {
2944     PyObject* key;
2945     PyObject* value;
2946     PyObject* res;
2947 
2948     if (data_len < 2 || data_in[0] != '&')
2949         return;
2950 
2951     if (PyErr_Occurred())
2952         return;
2953 
2954     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
2955     if (!key)
2956         return;
2957 
2958     value = PyDict_GetItem(self->entity, key);
2959 
2960     if (value) {
2961         if (TreeBuilder_CheckExact(self->target))
2962             res = treebuilder_handle_data(
2963                 (TreeBuilderObject*) self->target, value
2964                 );
2965         else if (self->handle_data)
2966             res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
2967         else
2968             res = NULL;
2969         Py_XDECREF(res);
2970     } else if (!PyErr_Occurred()) {
2971         /* Report the first error, not the last */
2972         char message[128] = "undefined entity ";
2973         strncat(message, data_in, data_len < 100?data_len:100);
2974         expat_set_error(
2975             XML_ERROR_UNDEFINED_ENTITY,
2976             EXPAT(GetErrorLineNumber)(self->parser),
2977             EXPAT(GetErrorColumnNumber)(self->parser),
2978             message
2979             );
2980     }
2981 
2982     Py_DECREF(key);
2983 }
2984 
2985 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)2986 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2987                     const XML_Char **attrib_in)
2988 {
2989     PyObject* res;
2990     PyObject* tag;
2991     PyObject* attrib;
2992     int ok;
2993 
2994     if (PyErr_Occurred())
2995         return;
2996 
2997     /* tag name */
2998     tag = makeuniversal(self, tag_in);
2999     if (!tag)
3000         return; /* parser will look for errors */
3001 
3002     /* attributes */
3003     if (attrib_in[0]) {
3004         attrib = PyDict_New();
3005         if (!attrib) {
3006             Py_DECREF(tag);
3007             return;
3008         }
3009         while (attrib_in[0] && attrib_in[1]) {
3010             PyObject* key = makeuniversal(self, attrib_in[0]);
3011             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3012             if (!key || !value) {
3013                 Py_XDECREF(value);
3014                 Py_XDECREF(key);
3015                 Py_DECREF(attrib);
3016                 Py_DECREF(tag);
3017                 return;
3018             }
3019             ok = PyDict_SetItem(attrib, key, value);
3020             Py_DECREF(value);
3021             Py_DECREF(key);
3022             if (ok < 0) {
3023                 Py_DECREF(attrib);
3024                 Py_DECREF(tag);
3025                 return;
3026             }
3027             attrib_in += 2;
3028         }
3029     } else {
3030         Py_INCREF(Py_None);
3031         attrib = Py_None;
3032     }
3033 
3034     if (TreeBuilder_CheckExact(self->target)) {
3035         /* shortcut */
3036         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3037                                        tag, attrib);
3038     }
3039     else if (self->handle_start) {
3040         if (attrib == Py_None) {
3041             Py_DECREF(attrib);
3042             attrib = PyDict_New();
3043             if (!attrib) {
3044                 Py_DECREF(tag);
3045                 return;
3046             }
3047         }
3048         res = PyObject_CallFunctionObjArgs(self->handle_start,
3049                                            tag, attrib, NULL);
3050     } else
3051         res = NULL;
3052 
3053     Py_DECREF(tag);
3054     Py_DECREF(attrib);
3055 
3056     Py_XDECREF(res);
3057 }
3058 
3059 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3060 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3061                    int data_len)
3062 {
3063     PyObject* data;
3064     PyObject* res;
3065 
3066     if (PyErr_Occurred())
3067         return;
3068 
3069     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3070     if (!data)
3071         return; /* parser will look for errors */
3072 
3073     if (TreeBuilder_CheckExact(self->target))
3074         /* shortcut */
3075         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3076     else if (self->handle_data)
3077         res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
3078     else
3079         res = NULL;
3080 
3081     Py_DECREF(data);
3082 
3083     Py_XDECREF(res);
3084 }
3085 
3086 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3087 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3088 {
3089     PyObject* tag;
3090     PyObject* res = NULL;
3091 
3092     if (PyErr_Occurred())
3093         return;
3094 
3095     if (TreeBuilder_CheckExact(self->target))
3096         /* shortcut */
3097         /* the standard tree builder doesn't look at the end tag */
3098         res = treebuilder_handle_end(
3099             (TreeBuilderObject*) self->target, Py_None
3100             );
3101     else if (self->handle_end) {
3102         tag = makeuniversal(self, tag_in);
3103         if (tag) {
3104             res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
3105             Py_DECREF(tag);
3106         }
3107     }
3108 
3109     Py_XDECREF(res);
3110 }
3111 
3112 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix,const XML_Char * uri)3113 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3114                        const XML_Char *uri)
3115 {
3116     TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3117     PyObject *parcel;
3118 
3119     if (PyErr_Occurred())
3120         return;
3121 
3122     if (!target->events_append || !target->start_ns_event_obj)
3123         return;
3124 
3125     if (!uri)
3126         uri = "";
3127     if (!prefix)
3128         prefix = "";
3129 
3130     parcel = Py_BuildValue("ss", prefix, uri);
3131     if (!parcel)
3132         return;
3133     treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3134     Py_DECREF(parcel);
3135 }
3136 
3137 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3138 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3139 {
3140     TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3141 
3142     if (PyErr_Occurred())
3143         return;
3144 
3145     if (!target->events_append)
3146         return;
3147 
3148     treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
3149 }
3150 
3151 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3152 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3153 {
3154     PyObject* comment;
3155     PyObject* res;
3156 
3157     if (PyErr_Occurred())
3158         return;
3159 
3160     if (self->handle_comment) {
3161         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3162         if (comment) {
3163             res = PyObject_CallFunctionObjArgs(self->handle_comment,
3164                                                comment, NULL);
3165             Py_XDECREF(res);
3166             Py_DECREF(comment);
3167         }
3168     }
3169 }
3170 
3171 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3172 expat_start_doctype_handler(XMLParserObject *self,
3173                             const XML_Char *doctype_name,
3174                             const XML_Char *sysid,
3175                             const XML_Char *pubid,
3176                             int has_internal_subset)
3177 {
3178     PyObject *self_pyobj = (PyObject *)self;
3179     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3180     PyObject *parser_doctype = NULL;
3181     PyObject *res = NULL;
3182 
3183     if (PyErr_Occurred())
3184         return;
3185 
3186     doctype_name_obj = makeuniversal(self, doctype_name);
3187     if (!doctype_name_obj)
3188         return;
3189 
3190     if (sysid) {
3191         sysid_obj = makeuniversal(self, sysid);
3192         if (!sysid_obj) {
3193             Py_DECREF(doctype_name_obj);
3194             return;
3195         }
3196     } else {
3197         Py_INCREF(Py_None);
3198         sysid_obj = Py_None;
3199     }
3200 
3201     if (pubid) {
3202         pubid_obj = makeuniversal(self, pubid);
3203         if (!pubid_obj) {
3204             Py_DECREF(doctype_name_obj);
3205             Py_DECREF(sysid_obj);
3206             return;
3207         }
3208     } else {
3209         Py_INCREF(Py_None);
3210         pubid_obj = Py_None;
3211     }
3212 
3213     /* If the target has a handler for doctype, call it. */
3214     if (self->handle_doctype) {
3215         res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3216                                            doctype_name_obj, pubid_obj,
3217                                            sysid_obj, NULL);
3218         Py_CLEAR(res);
3219     }
3220     else {
3221         /* Now see if the parser itself has a doctype method. If yes and it's
3222          * a custom method, call it but warn about deprecation. If it's only
3223          * the vanilla XMLParser method, do nothing.
3224          */
3225         parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3226         if (parser_doctype &&
3227             !(PyCFunction_Check(parser_doctype) &&
3228               PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3229               PyCFunction_GET_FUNCTION(parser_doctype) ==
3230                     (PyCFunction) _elementtree_XMLParser_doctype)) {
3231             res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3232                                                       pubid_obj, sysid_obj);
3233             if (!res)
3234                 goto clear;
3235             Py_DECREF(res);
3236             res = PyObject_CallFunctionObjArgs(parser_doctype,
3237                                                doctype_name_obj, pubid_obj,
3238                                                sysid_obj, NULL);
3239             Py_CLEAR(res);
3240         }
3241     }
3242 
3243 clear:
3244     Py_XDECREF(parser_doctype);
3245     Py_DECREF(doctype_name_obj);
3246     Py_DECREF(pubid_obj);
3247     Py_DECREF(sysid_obj);
3248 }
3249 
3250 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3251 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3252                  const XML_Char* data_in)
3253 {
3254     PyObject* target;
3255     PyObject* data;
3256     PyObject* res;
3257 
3258     if (PyErr_Occurred())
3259         return;
3260 
3261     if (self->handle_pi) {
3262         target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3263         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3264         if (target && data) {
3265             res = PyObject_CallFunctionObjArgs(self->handle_pi,
3266                                                target, data, NULL);
3267             Py_XDECREF(res);
3268             Py_DECREF(data);
3269             Py_DECREF(target);
3270         } else {
3271             Py_XDECREF(data);
3272             Py_XDECREF(target);
3273         }
3274     }
3275 }
3276 
3277 /* -------------------------------------------------------------------- */
3278 
3279 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3280 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3281 {
3282     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3283     if (self) {
3284         self->parser = NULL;
3285         self->target = self->entity = self->names = NULL;
3286         self->handle_start = self->handle_data = self->handle_end = NULL;
3287         self->handle_comment = self->handle_pi = self->handle_close = NULL;
3288         self->handle_doctype = NULL;
3289     }
3290     return (PyObject *)self;
3291 }
3292 
3293 static int
ignore_attribute_error(PyObject * value)3294 ignore_attribute_error(PyObject *value)
3295 {
3296     if (value == NULL) {
3297         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3298             return -1;
3299         }
3300         PyErr_Clear();
3301     }
3302     return 0;
3303 }
3304 
3305 /*[clinic input]
3306 _elementtree.XMLParser.__init__
3307 
3308     html: object = NULL
3309     target: object = NULL
3310     encoding: str(accept={str, NoneType}) = NULL
3311 
3312 [clinic start generated code]*/
3313 
3314 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * html,PyObject * target,const char * encoding)3315 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3316                                      PyObject *target, const char *encoding)
3317 /*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
3318 {
3319     if (html != NULL) {
3320         if (PyErr_WarnEx(PyExc_DeprecationWarning,
3321                          "The html argument of XMLParser() is deprecated",
3322                          1) < 0) {
3323             return -1;
3324         }
3325     }
3326 
3327     self->entity = PyDict_New();
3328     if (!self->entity)
3329         return -1;
3330 
3331     self->names = PyDict_New();
3332     if (!self->names) {
3333         Py_CLEAR(self->entity);
3334         return -1;
3335     }
3336 
3337     self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3338     if (!self->parser) {
3339         Py_CLEAR(self->entity);
3340         Py_CLEAR(self->names);
3341         PyErr_NoMemory();
3342         return -1;
3343     }
3344     /* expat < 2.1.0 has no XML_SetHashSalt() */
3345     if (EXPAT(SetHashSalt) != NULL) {
3346         EXPAT(SetHashSalt)(self->parser,
3347                            (unsigned long)_Py_HashSecret.expat.hashsalt);
3348     }
3349 
3350     if (target) {
3351         Py_INCREF(target);
3352     } else {
3353         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3354         if (!target) {
3355             Py_CLEAR(self->entity);
3356             Py_CLEAR(self->names);
3357             return -1;
3358         }
3359     }
3360     self->target = target;
3361 
3362     self->handle_start = PyObject_GetAttrString(target, "start");
3363     if (ignore_attribute_error(self->handle_start)) {
3364         return -1;
3365     }
3366     self->handle_data = PyObject_GetAttrString(target, "data");
3367     if (ignore_attribute_error(self->handle_data)) {
3368         return -1;
3369     }
3370     self->handle_end = PyObject_GetAttrString(target, "end");
3371     if (ignore_attribute_error(self->handle_end)) {
3372         return -1;
3373     }
3374     self->handle_comment = PyObject_GetAttrString(target, "comment");
3375     if (ignore_attribute_error(self->handle_comment)) {
3376         return -1;
3377     }
3378     self->handle_pi = PyObject_GetAttrString(target, "pi");
3379     if (ignore_attribute_error(self->handle_pi)) {
3380         return -1;
3381     }
3382     self->handle_close = PyObject_GetAttrString(target, "close");
3383     if (ignore_attribute_error(self->handle_close)) {
3384         return -1;
3385     }
3386     self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3387     if (ignore_attribute_error(self->handle_doctype)) {
3388         return -1;
3389     }
3390 
3391     /* configure parser */
3392     EXPAT(SetUserData)(self->parser, self);
3393     EXPAT(SetElementHandler)(
3394         self->parser,
3395         (XML_StartElementHandler) expat_start_handler,
3396         (XML_EndElementHandler) expat_end_handler
3397         );
3398     EXPAT(SetDefaultHandlerExpand)(
3399         self->parser,
3400         (XML_DefaultHandler) expat_default_handler
3401         );
3402     EXPAT(SetCharacterDataHandler)(
3403         self->parser,
3404         (XML_CharacterDataHandler) expat_data_handler
3405         );
3406     if (self->handle_comment)
3407         EXPAT(SetCommentHandler)(
3408             self->parser,
3409             (XML_CommentHandler) expat_comment_handler
3410             );
3411     if (self->handle_pi)
3412         EXPAT(SetProcessingInstructionHandler)(
3413             self->parser,
3414             (XML_ProcessingInstructionHandler) expat_pi_handler
3415             );
3416     EXPAT(SetStartDoctypeDeclHandler)(
3417         self->parser,
3418         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3419         );
3420     EXPAT(SetUnknownEncodingHandler)(
3421         self->parser,
3422         EXPAT(DefaultUnknownEncodingHandler), NULL
3423         );
3424 
3425     return 0;
3426 }
3427 
3428 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3429 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3430 {
3431     Py_VISIT(self->handle_close);
3432     Py_VISIT(self->handle_pi);
3433     Py_VISIT(self->handle_comment);
3434     Py_VISIT(self->handle_end);
3435     Py_VISIT(self->handle_data);
3436     Py_VISIT(self->handle_start);
3437 
3438     Py_VISIT(self->target);
3439     Py_VISIT(self->entity);
3440     Py_VISIT(self->names);
3441 
3442     return 0;
3443 }
3444 
3445 static int
xmlparser_gc_clear(XMLParserObject * self)3446 xmlparser_gc_clear(XMLParserObject *self)
3447 {
3448     if (self->parser != NULL) {
3449         XML_Parser parser = self->parser;
3450         self->parser = NULL;
3451         EXPAT(ParserFree)(parser);
3452     }
3453 
3454     Py_CLEAR(self->handle_close);
3455     Py_CLEAR(self->handle_pi);
3456     Py_CLEAR(self->handle_comment);
3457     Py_CLEAR(self->handle_end);
3458     Py_CLEAR(self->handle_data);
3459     Py_CLEAR(self->handle_start);
3460     Py_CLEAR(self->handle_doctype);
3461 
3462     Py_CLEAR(self->target);
3463     Py_CLEAR(self->entity);
3464     Py_CLEAR(self->names);
3465 
3466     return 0;
3467 }
3468 
3469 static void
xmlparser_dealloc(XMLParserObject * self)3470 xmlparser_dealloc(XMLParserObject* self)
3471 {
3472     PyObject_GC_UnTrack(self);
3473     xmlparser_gc_clear(self);
3474     Py_TYPE(self)->tp_free((PyObject *)self);
3475 }
3476 
3477 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3478 _check_xmlparser(XMLParserObject* self)
3479 {
3480     if (self->target == NULL) {
3481         PyErr_SetString(PyExc_ValueError,
3482                         "XMLParser.__init__() wasn't called");
3483         return 0;
3484     }
3485     return 1;
3486 }
3487 
3488 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3489 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3490 {
3491     int ok;
3492 
3493     assert(!PyErr_Occurred());
3494     ok = EXPAT(Parse)(self->parser, data, data_len, final);
3495 
3496     if (PyErr_Occurred())
3497         return NULL;
3498 
3499     if (!ok) {
3500         expat_set_error(
3501             EXPAT(GetErrorCode)(self->parser),
3502             EXPAT(GetErrorLineNumber)(self->parser),
3503             EXPAT(GetErrorColumnNumber)(self->parser),
3504             NULL
3505             );
3506         return NULL;
3507     }
3508 
3509     Py_RETURN_NONE;
3510 }
3511 
3512 /*[clinic input]
3513 _elementtree.XMLParser.close
3514 
3515 [clinic start generated code]*/
3516 
3517 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3518 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3519 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3520 {
3521     /* end feeding data to parser */
3522 
3523     PyObject* res;
3524 
3525     if (!_check_xmlparser(self)) {
3526         return NULL;
3527     }
3528     res = expat_parse(self, "", 0, 1);
3529     if (!res)
3530         return NULL;
3531 
3532     if (TreeBuilder_CheckExact(self->target)) {
3533         Py_DECREF(res);
3534         return treebuilder_done((TreeBuilderObject*) self->target);
3535     }
3536     else if (self->handle_close) {
3537         Py_DECREF(res);
3538         return _PyObject_CallNoArg(self->handle_close);
3539     }
3540     else {
3541         return res;
3542     }
3543 }
3544 
3545 /*[clinic input]
3546 _elementtree.XMLParser.feed
3547 
3548     data: object
3549     /
3550 
3551 [clinic start generated code]*/
3552 
3553 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3554 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3555 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3556 {
3557     /* feed data to parser */
3558 
3559     if (!_check_xmlparser(self)) {
3560         return NULL;
3561     }
3562     if (PyUnicode_Check(data)) {
3563         Py_ssize_t data_len;
3564         const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3565         if (data_ptr == NULL)
3566             return NULL;
3567         if (data_len > INT_MAX) {
3568             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3569             return NULL;
3570         }
3571         /* Explicitly set UTF-8 encoding. Return code ignored. */
3572         (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3573         return expat_parse(self, data_ptr, (int)data_len, 0);
3574     }
3575     else {
3576         Py_buffer view;
3577         PyObject *res;
3578         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3579             return NULL;
3580         if (view.len > INT_MAX) {
3581             PyBuffer_Release(&view);
3582             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3583             return NULL;
3584         }
3585         res = expat_parse(self, view.buf, (int)view.len, 0);
3586         PyBuffer_Release(&view);
3587         return res;
3588     }
3589 }
3590 
3591 /*[clinic input]
3592 _elementtree.XMLParser._parse_whole
3593 
3594     file: object
3595     /
3596 
3597 [clinic start generated code]*/
3598 
3599 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3600 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3601 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3602 {
3603     /* (internal) parse the whole input, until end of stream */
3604     PyObject* reader;
3605     PyObject* buffer;
3606     PyObject* temp;
3607     PyObject* res;
3608 
3609     if (!_check_xmlparser(self)) {
3610         return NULL;
3611     }
3612     reader = PyObject_GetAttrString(file, "read");
3613     if (!reader)
3614         return NULL;
3615 
3616     /* read from open file object */
3617     for (;;) {
3618 
3619         buffer = PyObject_CallFunction(reader, "i", 64*1024);
3620 
3621         if (!buffer) {
3622             /* read failed (e.g. due to KeyboardInterrupt) */
3623             Py_DECREF(reader);
3624             return NULL;
3625         }
3626 
3627         if (PyUnicode_CheckExact(buffer)) {
3628             /* A unicode object is encoded into bytes using UTF-8 */
3629             if (PyUnicode_GET_LENGTH(buffer) == 0) {
3630                 Py_DECREF(buffer);
3631                 break;
3632             }
3633             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3634             Py_DECREF(buffer);
3635             if (!temp) {
3636                 /* Propagate exception from PyUnicode_AsEncodedString */
3637                 Py_DECREF(reader);
3638                 return NULL;
3639             }
3640             buffer = temp;
3641         }
3642         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3643             Py_DECREF(buffer);
3644             break;
3645         }
3646 
3647         if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3648             Py_DECREF(buffer);
3649             Py_DECREF(reader);
3650             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3651             return NULL;
3652         }
3653         res = expat_parse(
3654             self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3655             );
3656 
3657         Py_DECREF(buffer);
3658 
3659         if (!res) {
3660             Py_DECREF(reader);
3661             return NULL;
3662         }
3663         Py_DECREF(res);
3664 
3665     }
3666 
3667     Py_DECREF(reader);
3668 
3669     res = expat_parse(self, "", 0, 1);
3670 
3671     if (res && TreeBuilder_CheckExact(self->target)) {
3672         Py_DECREF(res);
3673         return treebuilder_done((TreeBuilderObject*) self->target);
3674     }
3675 
3676     return res;
3677 }
3678 
3679 /*[clinic input]
3680 _elementtree.XMLParser.doctype
3681 
3682     name: object
3683     pubid: object
3684     system: object
3685     /
3686 
3687 [clinic start generated code]*/
3688 
3689 static PyObject *
_elementtree_XMLParser_doctype_impl(XMLParserObject * self,PyObject * name,PyObject * pubid,PyObject * system)3690 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3691                                     PyObject *pubid, PyObject *system)
3692 /*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
3693 {
3694     if (PyErr_WarnEx(PyExc_DeprecationWarning,
3695                      "This method of XMLParser is deprecated.  Define"
3696                      " doctype() method on the TreeBuilder target.",
3697                      1) < 0) {
3698         return NULL;
3699     }
3700     Py_RETURN_NONE;
3701 }
3702 
3703 /*[clinic input]
3704 _elementtree.XMLParser._setevents
3705 
3706     events_queue: object
3707     events_to_report: object = None
3708     /
3709 
3710 [clinic start generated code]*/
3711 
3712 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)3713 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3714                                        PyObject *events_queue,
3715                                        PyObject *events_to_report)
3716 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
3717 {
3718     /* activate element event reporting */
3719     Py_ssize_t i;
3720     TreeBuilderObject *target;
3721     PyObject *events_append, *events_seq;
3722 
3723     if (!_check_xmlparser(self)) {
3724         return NULL;
3725     }
3726     if (!TreeBuilder_CheckExact(self->target)) {
3727         PyErr_SetString(
3728             PyExc_TypeError,
3729             "event handling only supported for ElementTree.TreeBuilder "
3730             "targets"
3731             );
3732         return NULL;
3733     }
3734 
3735     target = (TreeBuilderObject*) self->target;
3736 
3737     events_append = PyObject_GetAttrString(events_queue, "append");
3738     if (events_append == NULL)
3739         return NULL;
3740     Py_XSETREF(target->events_append, events_append);
3741 
3742     /* clear out existing events */
3743     Py_CLEAR(target->start_event_obj);
3744     Py_CLEAR(target->end_event_obj);
3745     Py_CLEAR(target->start_ns_event_obj);
3746     Py_CLEAR(target->end_ns_event_obj);
3747 
3748     if (events_to_report == Py_None) {
3749         /* default is "end" only */
3750         target->end_event_obj = PyUnicode_FromString("end");
3751         Py_RETURN_NONE;
3752     }
3753 
3754     if (!(events_seq = PySequence_Fast(events_to_report,
3755                                        "events must be a sequence"))) {
3756         return NULL;
3757     }
3758 
3759     for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
3760         PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3761         const char *event_name = NULL;
3762         if (PyUnicode_Check(event_name_obj)) {
3763             event_name = PyUnicode_AsUTF8(event_name_obj);
3764         } else if (PyBytes_Check(event_name_obj)) {
3765             event_name = PyBytes_AS_STRING(event_name_obj);
3766         }
3767         if (event_name == NULL) {
3768             Py_DECREF(events_seq);
3769             PyErr_Format(PyExc_ValueError, "invalid events sequence");
3770             return NULL;
3771         }
3772 
3773         Py_INCREF(event_name_obj);
3774         if (strcmp(event_name, "start") == 0) {
3775             Py_XSETREF(target->start_event_obj, event_name_obj);
3776         } else if (strcmp(event_name, "end") == 0) {
3777             Py_XSETREF(target->end_event_obj, event_name_obj);
3778         } else if (strcmp(event_name, "start-ns") == 0) {
3779             Py_XSETREF(target->start_ns_event_obj, event_name_obj);
3780             EXPAT(SetNamespaceDeclHandler)(
3781                 self->parser,
3782                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3783                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3784                 );
3785         } else if (strcmp(event_name, "end-ns") == 0) {
3786             Py_XSETREF(target->end_ns_event_obj, event_name_obj);
3787             EXPAT(SetNamespaceDeclHandler)(
3788                 self->parser,
3789                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3790                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3791                 );
3792         } else {
3793             Py_DECREF(event_name_obj);
3794             Py_DECREF(events_seq);
3795             PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
3796             return NULL;
3797         }
3798     }
3799 
3800     Py_DECREF(events_seq);
3801     Py_RETURN_NONE;
3802 }
3803 
3804 static PyObject*
xmlparser_getattro(XMLParserObject * self,PyObject * nameobj)3805 xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
3806 {
3807     if (PyUnicode_Check(nameobj)) {
3808         PyObject* res;
3809         if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
3810             res = self->entity;
3811         else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
3812             res = self->target;
3813         else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
3814             return PyUnicode_FromFormat(
3815                 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3816                 XML_MINOR_VERSION, XML_MICRO_VERSION);
3817         }
3818         else
3819             goto generic;
3820 
3821         if (!res && !_check_xmlparser(self)) {
3822              return NULL;
3823         }
3824         Py_INCREF(res);
3825         return res;
3826     }
3827   generic:
3828     return PyObject_GenericGetAttr((PyObject*) self, nameobj);
3829 }
3830 
3831 #include "clinic/_elementtree.c.h"
3832 
3833 static PyMethodDef element_methods[] = {
3834 
3835     _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3836 
3837     _ELEMENTTREE_ELEMENT_GET_METHODDEF
3838     _ELEMENTTREE_ELEMENT_SET_METHODDEF
3839 
3840     _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3841     _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3842     _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3843 
3844     _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3845     _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3846     _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3847     _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3848 
3849     _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3850     _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3851     _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3852 
3853     _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
3854     _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3855 
3856     _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3857     _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3858 
3859     _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3860 
3861     _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3862     _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3863     _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3864     _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3865     _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3866 
3867     {NULL, NULL}
3868 };
3869 
3870 static PyMappingMethods element_as_mapping = {
3871     (lenfunc) element_length,
3872     (binaryfunc) element_subscr,
3873     (objobjargproc) element_ass_subscr,
3874 };
3875 
3876 static PyGetSetDef element_getsetlist[] = {
3877     {"tag",
3878         (getter)element_tag_getter,
3879         (setter)element_tag_setter,
3880         "A string identifying what kind of data this element represents"},
3881     {"text",
3882         (getter)element_text_getter,
3883         (setter)element_text_setter,
3884         "A string of text directly after the start tag, or None"},
3885     {"tail",
3886         (getter)element_tail_getter,
3887         (setter)element_tail_setter,
3888         "A string of text directly after the end tag, or None"},
3889     {"attrib",
3890         (getter)element_attrib_getter,
3891         (setter)element_attrib_setter,
3892         "A dictionary containing the element's attributes"},
3893     {NULL},
3894 };
3895 
3896 static PyTypeObject Element_Type = {
3897     PyVarObject_HEAD_INIT(NULL, 0)
3898     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3899     /* methods */
3900     (destructor)element_dealloc,                    /* tp_dealloc */
3901     0,                                              /* tp_print */
3902     0,                                              /* tp_getattr */
3903     0,                                              /* tp_setattr */
3904     0,                                              /* tp_reserved */
3905     (reprfunc)element_repr,                         /* tp_repr */
3906     0,                                              /* tp_as_number */
3907     &element_as_sequence,                           /* tp_as_sequence */
3908     &element_as_mapping,                            /* tp_as_mapping */
3909     0,                                              /* tp_hash */
3910     0,                                              /* tp_call */
3911     0,                                              /* tp_str */
3912     PyObject_GenericGetAttr,                        /* tp_getattro */
3913     0,                                              /* tp_setattro */
3914     0,                                              /* tp_as_buffer */
3915     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3916                                                     /* tp_flags */
3917     0,                                              /* tp_doc */
3918     (traverseproc)element_gc_traverse,              /* tp_traverse */
3919     (inquiry)element_gc_clear,                      /* tp_clear */
3920     0,                                              /* tp_richcompare */
3921     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
3922     0,                                              /* tp_iter */
3923     0,                                              /* tp_iternext */
3924     element_methods,                                /* tp_methods */
3925     0,                                              /* tp_members */
3926     element_getsetlist,                             /* tp_getset */
3927     0,                                              /* tp_base */
3928     0,                                              /* tp_dict */
3929     0,                                              /* tp_descr_get */
3930     0,                                              /* tp_descr_set */
3931     0,                                              /* tp_dictoffset */
3932     (initproc)element_init,                         /* tp_init */
3933     PyType_GenericAlloc,                            /* tp_alloc */
3934     element_new,                                    /* tp_new */
3935     0,                                              /* tp_free */
3936 };
3937 
3938 static PyMethodDef treebuilder_methods[] = {
3939     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3940     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3941     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3942     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3943     {NULL, NULL}
3944 };
3945 
3946 static PyTypeObject TreeBuilder_Type = {
3947     PyVarObject_HEAD_INIT(NULL, 0)
3948     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3949     /* methods */
3950     (destructor)treebuilder_dealloc,                /* tp_dealloc */
3951     0,                                              /* tp_print */
3952     0,                                              /* tp_getattr */
3953     0,                                              /* tp_setattr */
3954     0,                                              /* tp_reserved */
3955     0,                                              /* tp_repr */
3956     0,                                              /* tp_as_number */
3957     0,                                              /* tp_as_sequence */
3958     0,                                              /* tp_as_mapping */
3959     0,                                              /* tp_hash */
3960     0,                                              /* tp_call */
3961     0,                                              /* tp_str */
3962     0,                                              /* tp_getattro */
3963     0,                                              /* tp_setattro */
3964     0,                                              /* tp_as_buffer */
3965     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3966                                                     /* tp_flags */
3967     0,                                              /* tp_doc */
3968     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
3969     (inquiry)treebuilder_gc_clear,                  /* tp_clear */
3970     0,                                              /* tp_richcompare */
3971     0,                                              /* tp_weaklistoffset */
3972     0,                                              /* tp_iter */
3973     0,                                              /* tp_iternext */
3974     treebuilder_methods,                            /* tp_methods */
3975     0,                                              /* tp_members */
3976     0,                                              /* tp_getset */
3977     0,                                              /* tp_base */
3978     0,                                              /* tp_dict */
3979     0,                                              /* tp_descr_get */
3980     0,                                              /* tp_descr_set */
3981     0,                                              /* tp_dictoffset */
3982     _elementtree_TreeBuilder___init__,              /* tp_init */
3983     PyType_GenericAlloc,                            /* tp_alloc */
3984     treebuilder_new,                                /* tp_new */
3985     0,                                              /* tp_free */
3986 };
3987 
3988 static PyMethodDef xmlparser_methods[] = {
3989     _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3990     _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3991     _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3992     _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3993     _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3994     {NULL, NULL}
3995 };
3996 
3997 static PyTypeObject XMLParser_Type = {
3998     PyVarObject_HEAD_INIT(NULL, 0)
3999     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4000     /* methods */
4001     (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4002     0,                                              /* tp_print */
4003     0,                                              /* tp_getattr */
4004     0,                                              /* tp_setattr */
4005     0,                                              /* tp_reserved */
4006     0,                                              /* tp_repr */
4007     0,                                              /* tp_as_number */
4008     0,                                              /* tp_as_sequence */
4009     0,                                              /* tp_as_mapping */
4010     0,                                              /* tp_hash */
4011     0,                                              /* tp_call */
4012     0,                                              /* tp_str */
4013     (getattrofunc)xmlparser_getattro,               /* tp_getattro */
4014     0,                                              /* tp_setattro */
4015     0,                                              /* tp_as_buffer */
4016     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4017                                                     /* tp_flags */
4018     0,                                              /* tp_doc */
4019     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4020     (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4021     0,                                              /* tp_richcompare */
4022     0,                                              /* tp_weaklistoffset */
4023     0,                                              /* tp_iter */
4024     0,                                              /* tp_iternext */
4025     xmlparser_methods,                              /* tp_methods */
4026     0,                                              /* tp_members */
4027     0,                                              /* tp_getset */
4028     0,                                              /* tp_base */
4029     0,                                              /* tp_dict */
4030     0,                                              /* tp_descr_get */
4031     0,                                              /* tp_descr_set */
4032     0,                                              /* tp_dictoffset */
4033     _elementtree_XMLParser___init__,                /* tp_init */
4034     PyType_GenericAlloc,                            /* tp_alloc */
4035     xmlparser_new,                                  /* tp_new */
4036     0,                                              /* tp_free */
4037 };
4038 
4039 /* ==================================================================== */
4040 /* python module interface */
4041 
4042 static PyMethodDef _functions[] = {
4043     {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
4044     {NULL, NULL}
4045 };
4046 
4047 
4048 static struct PyModuleDef elementtreemodule = {
4049     PyModuleDef_HEAD_INIT,
4050     "_elementtree",
4051     NULL,
4052     sizeof(elementtreestate),
4053     _functions,
4054     NULL,
4055     elementtree_traverse,
4056     elementtree_clear,
4057     elementtree_free
4058 };
4059 
4060 PyMODINIT_FUNC
PyInit__elementtree(void)4061 PyInit__elementtree(void)
4062 {
4063     PyObject *m, *temp;
4064     elementtreestate *st;
4065 
4066     m = PyState_FindModule(&elementtreemodule);
4067     if (m) {
4068         Py_INCREF(m);
4069         return m;
4070     }
4071 
4072     /* Initialize object types */
4073     if (PyType_Ready(&ElementIter_Type) < 0)
4074         return NULL;
4075     if (PyType_Ready(&TreeBuilder_Type) < 0)
4076         return NULL;
4077     if (PyType_Ready(&Element_Type) < 0)
4078         return NULL;
4079     if (PyType_Ready(&XMLParser_Type) < 0)
4080         return NULL;
4081 
4082     m = PyModule_Create(&elementtreemodule);
4083     if (!m)
4084         return NULL;
4085     st = ET_STATE(m);
4086 
4087     if (!(temp = PyImport_ImportModule("copy")))
4088         return NULL;
4089     st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4090     Py_XDECREF(temp);
4091 
4092     if (st->deepcopy_obj == NULL) {
4093         return NULL;
4094     }
4095 
4096     assert(!PyErr_Occurred());
4097     if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4098         return NULL;
4099 
4100     /* link against pyexpat */
4101     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4102     if (expat_capi) {
4103         /* check that it's usable */
4104         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4105             (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4106             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4107             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4108             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4109             PyErr_SetString(PyExc_ImportError,
4110                             "pyexpat version is incompatible");
4111             return NULL;
4112         }
4113     } else {
4114         return NULL;
4115     }
4116 
4117     st->parseerror_obj = PyErr_NewException(
4118         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4119         );
4120     Py_INCREF(st->parseerror_obj);
4121     PyModule_AddObject(m, "ParseError", st->parseerror_obj);
4122 
4123     Py_INCREF((PyObject *)&Element_Type);
4124     PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4125 
4126     Py_INCREF((PyObject *)&TreeBuilder_Type);
4127     PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4128 
4129     Py_INCREF((PyObject *)&XMLParser_Type);
4130     PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
4131 
4132     return m;
4133 }
4134