1 /*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * info@pythonware.com
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14 #define PY_SSIZE_T_CLEAN
15
16 #include "Python.h"
17 #include "structmember.h"
18
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21
22 /* An element can hold this many children without extra memory
23 allocations. */
24 #define STATIC_CHILDREN 4
25
26 /* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31 /* Also note that pymalloc always allocates blocks in multiples of
32 eight bytes. For the current C version of ElementTree, this means
33 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36 /* -------------------------------------------------------------------- */
37
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55
56 /* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71 }
72
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78 if (*p) {
79 _set_joined_ptr(p, NULL);
80 }
81 }
82
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88
89
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95 PyObject *comment_factory;
96 PyObject *pi_factory;
97 } elementtreestate;
98
99 static struct PyModuleDef elementtreemodule;
100
101 /* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104 #define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106 /* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109 #define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112 static int
elementtree_clear(PyObject * m)113 elementtree_clear(PyObject *m)
114 {
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
121 return 0;
122 }
123
124 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)125 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126 {
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
133 return 0;
134 }
135
136 static void
elementtree_free(void * m)137 elementtree_free(void *m)
138 {
139 elementtree_clear((PyObject *)m);
140 }
141
142 /* helpers */
143
144 LOCAL(PyObject*)
list_join(PyObject * list)145 list_join(PyObject* list)
146 {
147 /* join list elements */
148 PyObject* joiner;
149 PyObject* result;
150
151 joiner = PyUnicode_FromStringAndSize("", 0);
152 if (!joiner)
153 return NULL;
154 result = PyUnicode_Join(joiner, list);
155 Py_DECREF(joiner);
156 return result;
157 }
158
159 /* Is the given object an empty dictionary?
160 */
161 static int
is_empty_dict(PyObject * obj)162 is_empty_dict(PyObject *obj)
163 {
164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
165 }
166
167
168 /* -------------------------------------------------------------------- */
169 /* the Element type */
170
171 typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
184
185 } ElementObjectExtra;
186
187 typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
209 } ElementObject;
210
211
212 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
213 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
215
216 /* -------------------------------------------------------------------- */
217 /* Element constructors and destructor */
218
219 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)220 create_extra(ElementObject* self, PyObject* attrib)
221 {
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
223 if (!self->extra) {
224 PyErr_NoMemory();
225 return -1;
226 }
227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239 }
240
241 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)242 dealloc_extra(ElementObjectExtra *extra)
243 {
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258 }
259
260 LOCAL(void)
clear_extra(ElementObject * self)261 clear_extra(ElementObject* self)
262 {
263 ElementObjectExtra *myextra;
264
265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
270 myextra = self->extra;
271 self->extra = NULL;
272
273 dealloc_extra(myextra);
274 }
275
276 /* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278 */
279 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)280 create_new_element(PyObject* tag, PyObject* attrib)
281 {
282 ElementObject* self;
283
284 self = PyObject_GC_New(ElementObject, &Element_Type);
285 if (self == NULL)
286 return NULL;
287 self->extra = NULL;
288
289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
298 self->weakreflist = NULL;
299
300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
305 Py_DECREF(self);
306 return NULL;
307 }
308 }
309
310 return (PyObject*) self;
311 }
312
313 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)314 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315 {
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
328 e->weakreflist = NULL;
329 }
330 return (PyObject *)e;
331 }
332
333 /* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
335 * either accept attrib as a keyword argument or all attributes splashed
336 * directly into *kwds.
337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
340 */
341 static PyObject*
get_attrib_from_keywords(PyObject * kwds)342 get_attrib_from_keywords(PyObject *kwds)
343 {
344 PyObject *attrib_str = PyUnicode_FromString("attrib");
345 if (attrib_str == NULL) {
346 return NULL;
347 }
348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
355 Py_DECREF(attrib_str);
356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
365 }
366 else if (!PyErr_Occurred()) {
367 attrib = PyDict_New();
368 }
369
370 Py_DECREF(attrib_str);
371
372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
376 return attrib;
377 }
378
379 /*[clinic input]
380 module _elementtree
381 class _elementtree.Element "ElementObject *" "&Element_Type"
382 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384 [clinic start generated code]*/
385 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
387 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)388 element_init(PyObject *self, PyObject *args, PyObject *kwds)
389 {
390 PyObject *tag;
391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
400 if (!attrib)
401 return -1;
402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
404 Py_DECREF(attrib);
405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
413 }
414
415 self_elem = (ElementObject *)self;
416
417 if (attrib != NULL && !is_empty_dict(attrib)) {
418 if (create_extra(self_elem, attrib) < 0) {
419 Py_DECREF(attrib);
420 return -1;
421 }
422 }
423
424 /* We own a reference to attrib here and it's no longer needed. */
425 Py_XDECREF(attrib);
426
427 /* Replace the objects already pointed to by tag, text and tail. */
428 Py_INCREF(tag);
429 Py_XSETREF(self_elem->tag, tag);
430
431 Py_INCREF(Py_None);
432 _set_joined_ptr(&self_elem->text, Py_None);
433
434 Py_INCREF(Py_None);
435 _set_joined_ptr(&self_elem->tail, Py_None);
436
437 return 0;
438 }
439
440 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)441 element_resize(ElementObject* self, Py_ssize_t extra)
442 {
443 Py_ssize_t size;
444 PyObject* *children;
445
446 assert(extra >= 0);
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
454
455 size = self->extra->length + extra; /* never overflows */
456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
463 * be safe.
464 */
465 size = size ? size : 1;
466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
468 if (self->extra->children != self->extra->_children) {
469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
470 * "children", which needs at least 4 bytes. Although it's a
471 * false alarm always assume at least one child to be safe.
472 */
473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494 }
495
496 LOCAL(void)
raise_type_error(PyObject * element)497 raise_type_error(PyObject *element)
498 {
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502 }
503
504 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)505 element_add_subelement(ElementObject* self, PyObject* element)
506 {
507 /* add a child element to a parent */
508
509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523 }
524
525 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)526 element_get_attrib(ElementObject* self)
527 {
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
538 Py_DECREF(Py_None);
539 self->extra->attrib = res;
540 }
541
542 return res;
543 }
544
545 LOCAL(PyObject*)
element_get_text(ElementObject * self)546 element_get_text(ElementObject* self)
547 {
548 /* return borrowed reference to text attribute */
549
550 PyObject *res = self->text;
551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
555 PyObject *tmp = list_join(res);
556 if (!tmp)
557 return NULL;
558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
561 }
562 }
563
564 return res;
565 }
566
567 LOCAL(PyObject*)
element_get_tail(ElementObject * self)568 element_get_tail(ElementObject* self)
569 {
570 /* return borrowed reference to text attribute */
571
572 PyObject *res = self->tail;
573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
577 PyObject *tmp = list_join(res);
578 if (!tmp)
579 return NULL;
580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
583 }
584 }
585
586 return res;
587 }
588
589 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)590 subelement(PyObject *self, PyObject *args, PyObject *kwds)
591 {
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
599 &PyDict_Type, &attrib)) {
600 return NULL;
601 }
602
603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
606 if (!attrib)
607 return NULL;
608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
617 } else {
618 /* no attrib arg, no kwds, so no attribute */
619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
623 elem = create_new_element(tag, attrib);
624 Py_DECREF(attrib);
625 if (elem == NULL)
626 return NULL;
627
628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
630 return NULL;
631 }
632
633 return elem;
634 }
635
636 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)637 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638 {
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
644 Py_ssize_t i;
645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651 }
652
653 static int
element_gc_clear(ElementObject * self)654 element_gc_clear(ElementObject *self)
655 {
656 Py_CLEAR(self->tag);
657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
659
660 /* After dropping all references from extra, it's no longer valid anyway,
661 * so fully deallocate it.
662 */
663 clear_extra(self);
664 return 0;
665 }
666
667 static void
element_dealloc(ElementObject * self)668 element_dealloc(ElementObject* self)
669 {
670 /* bpo-31095: UnTrack is needed before calling any callbacks */
671 PyObject_GC_UnTrack(self);
672 Py_TRASHCAN_BEGIN(self, element_dealloc)
673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
680
681 RELEASE(sizeof(ElementObject), "destroy element");
682 Py_TYPE(self)->tp_free((PyObject *)self);
683 Py_TRASHCAN_END
684 }
685
686 /* -------------------------------------------------------------------- */
687
688 /*[clinic input]
689 _elementtree.Element.append
690
691 subelement: object(subclass_of='&Element_Type')
692 /
693
694 [clinic start generated code]*/
695
696 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)697 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699 {
700 if (element_add_subelement(self, subelement) < 0)
701 return NULL;
702
703 Py_RETURN_NONE;
704 }
705
706 /*[clinic input]
707 _elementtree.Element.clear
708
709 [clinic start generated code]*/
710
711 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)712 _elementtree_Element_clear_impl(ElementObject *self)
713 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714 {
715 clear_extra(self);
716
717 Py_INCREF(Py_None);
718 _set_joined_ptr(&self->text, Py_None);
719
720 Py_INCREF(Py_None);
721 _set_joined_ptr(&self->tail, Py_None);
722
723 Py_RETURN_NONE;
724 }
725
726 /*[clinic input]
727 _elementtree.Element.__copy__
728
729 [clinic start generated code]*/
730
731 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)732 _elementtree_Element___copy___impl(ElementObject *self)
733 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
734 {
735 Py_ssize_t i;
736 ElementObject* element;
737
738 element = (ElementObject*) create_new_element(
739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
740 if (!element)
741 return NULL;
742
743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
745
746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
748
749 assert(!element->extra || !element->extra->length);
750 if (self->extra) {
751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
753 return NULL;
754 }
755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
761 assert(!element->extra->length);
762 element->extra->length = self->extra->length;
763 }
764
765 return (PyObject*) element;
766 }
767
768 /* Helper for a deep copy. */
769 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
771 /*[clinic input]
772 _elementtree.Element.__deepcopy__
773
774 memo: object(subclass_of="&PyDict_Type")
775 /
776
777 [clinic start generated code]*/
778
779 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)780 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
782 {
783 Py_ssize_t i;
784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
806 element = (ElementObject*) create_new_element(tag, attrib);
807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
813
814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
823
824 assert(!element->extra || !element->extra->length);
825 if (self->extra) {
826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
842 assert(!element->extra->length);
843 element->extra->length = self->extra->length;
844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
847 id = PyLong_FromSsize_t((uintptr_t) self);
848 if (!id)
849 goto error;
850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863 }
864
865 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)866 deepcopy(PyObject *object, PyObject *memo)
867 {
868 /* do a deep copy of the given object */
869 elementtreestate *st;
870 PyObject *stack[2];
871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
907 stack[0] = object;
908 stack[1] = memo;
909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
910 }
911
912
913 /*[clinic input]
914 _elementtree.Element.__sizeof__ -> Py_ssize_t
915
916 [clinic start generated code]*/
917
918 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)919 _elementtree_Element___sizeof___impl(ElementObject *self)
920 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
921 {
922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
928 return result;
929 }
930
931 /* dict keys for getstate/setstate. */
932 #define PICKLED_TAG "tag"
933 #define PICKLED_CHILDREN "_children"
934 #define PICKLED_ATTRIB "attrib"
935 #define PICKLED_TAIL "tail"
936 #define PICKLED_TEXT "text"
937
938 /* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
944 /*[clinic input]
945 _elementtree.Element.__getstate__
946
947 [clinic start generated code]*/
948
949 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)950 _elementtree_Element___getstate___impl(ElementObject *self)
951 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
952 {
953 Py_ssize_t i;
954 PyObject *children, *attrib;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 if (self->extra && self->extra->attrib != Py_None) {
967 attrib = self->extra->attrib;
968 Py_INCREF(attrib);
969 }
970 else {
971 attrib = PyDict_New();
972 if (!attrib) {
973 Py_DECREF(children);
974 return NULL;
975 }
976 }
977
978 return Py_BuildValue("{sOsNsNsOsO}",
979 PICKLED_TAG, self->tag,
980 PICKLED_CHILDREN, children,
981 PICKLED_ATTRIB, attrib,
982 PICKLED_TEXT, JOIN_OBJ(self->text),
983 PICKLED_TAIL, JOIN_OBJ(self->tail));
984 }
985
986 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)987 element_setstate_from_attributes(ElementObject *self,
988 PyObject *tag,
989 PyObject *attrib,
990 PyObject *text,
991 PyObject *tail,
992 PyObject *children)
993 {
994 Py_ssize_t i, nchildren;
995 ElementObjectExtra *oldextra = NULL;
996
997 if (!tag) {
998 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
999 return NULL;
1000 }
1001
1002 Py_INCREF(tag);
1003 Py_XSETREF(self->tag, tag);
1004
1005 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(text));
1007 _set_joined_ptr(&self->text, text);
1008
1009 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1010 Py_INCREF(JOIN_OBJ(tail));
1011 _set_joined_ptr(&self->tail, tail);
1012
1013 /* Handle ATTRIB and CHILDREN. */
1014 if (!children && !attrib) {
1015 Py_RETURN_NONE;
1016 }
1017
1018 /* Compute 'nchildren'. */
1019 if (children) {
1020 if (!PyList_Check(children)) {
1021 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1022 return NULL;
1023 }
1024 nchildren = PyList_GET_SIZE(children);
1025
1026 /* (Re-)allocate 'extra'.
1027 Avoid DECREFs calling into this code again (cycles, etc.)
1028 */
1029 oldextra = self->extra;
1030 self->extra = NULL;
1031 if (element_resize(self, nchildren)) {
1032 assert(!self->extra || !self->extra->length);
1033 clear_extra(self);
1034 self->extra = oldextra;
1035 return NULL;
1036 }
1037 assert(self->extra);
1038 assert(self->extra->allocated >= nchildren);
1039 if (oldextra) {
1040 assert(self->extra->attrib == Py_None);
1041 self->extra->attrib = oldextra->attrib;
1042 oldextra->attrib = Py_None;
1043 }
1044
1045 /* Copy children */
1046 for (i = 0; i < nchildren; i++) {
1047 PyObject *child = PyList_GET_ITEM(children, i);
1048 if (!Element_Check(child)) {
1049 raise_type_error(child);
1050 self->extra->length = i;
1051 dealloc_extra(oldextra);
1052 return NULL;
1053 }
1054 Py_INCREF(child);
1055 self->extra->children[i] = child;
1056 }
1057
1058 assert(!self->extra->length);
1059 self->extra->length = nchildren;
1060 }
1061 else {
1062 if (element_resize(self, 0)) {
1063 return NULL;
1064 }
1065 }
1066
1067 /* Stash attrib. */
1068 if (attrib) {
1069 Py_INCREF(attrib);
1070 Py_XSETREF(self->extra->attrib, attrib);
1071 }
1072 dealloc_extra(oldextra);
1073
1074 Py_RETURN_NONE;
1075 }
1076
1077 /* __setstate__ for Element instance from the Python implementation.
1078 * 'state' should be the instance dict.
1079 */
1080
1081 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1082 element_setstate_from_Python(ElementObject *self, PyObject *state)
1083 {
1084 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1085 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1086 PyObject *args;
1087 PyObject *tag, *attrib, *text, *tail, *children;
1088 PyObject *retval;
1089
1090 tag = attrib = text = tail = children = NULL;
1091 args = PyTuple_New(0);
1092 if (!args)
1093 return NULL;
1094
1095 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1096 &attrib, &text, &tail, &children))
1097 retval = element_setstate_from_attributes(self, tag, attrib, text,
1098 tail, children);
1099 else
1100 retval = NULL;
1101
1102 Py_DECREF(args);
1103 return retval;
1104 }
1105
1106 /*[clinic input]
1107 _elementtree.Element.__setstate__
1108
1109 state: object
1110 /
1111
1112 [clinic start generated code]*/
1113
1114 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1115 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1116 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1117 {
1118 if (!PyDict_CheckExact(state)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "Don't know how to unpickle \"%.200R\" as an Element",
1121 state);
1122 return NULL;
1123 }
1124 else
1125 return element_setstate_from_Python(self, state);
1126 }
1127
1128 LOCAL(int)
checkpath(PyObject * tag)1129 checkpath(PyObject* tag)
1130 {
1131 Py_ssize_t i;
1132 int check = 1;
1133
1134 /* check if a tag contains an xpath character */
1135
1136 #define PATHCHAR(ch) \
1137 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1138
1139 if (PyUnicode_Check(tag)) {
1140 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1141 void *data = PyUnicode_DATA(tag);
1142 unsigned int kind = PyUnicode_KIND(tag);
1143 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1144 PyUnicode_READ(kind, data, 1) == '}' || (
1145 PyUnicode_READ(kind, data, 1) == '*' &&
1146 PyUnicode_READ(kind, data, 2) == '}'))) {
1147 /* wildcard: '{}tag' or '{*}tag' */
1148 return 1;
1149 }
1150 for (i = 0; i < len; i++) {
1151 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1152 if (ch == '{')
1153 check = 0;
1154 else if (ch == '}')
1155 check = 1;
1156 else if (check && PATHCHAR(ch))
1157 return 1;
1158 }
1159 return 0;
1160 }
1161 if (PyBytes_Check(tag)) {
1162 char *p = PyBytes_AS_STRING(tag);
1163 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1164 if (len >= 3 && p[0] == '{' && (
1165 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1166 /* wildcard: '{}tag' or '{*}tag' */
1167 return 1;
1168 }
1169 for (i = 0; i < len; i++) {
1170 if (p[i] == '{')
1171 check = 0;
1172 else if (p[i] == '}')
1173 check = 1;
1174 else if (check && PATHCHAR(p[i]))
1175 return 1;
1176 }
1177 return 0;
1178 }
1179
1180 return 1; /* unknown type; might be path expression */
1181 }
1182
1183 /*[clinic input]
1184 _elementtree.Element.extend
1185
1186 elements: object
1187 /
1188
1189 [clinic start generated code]*/
1190
1191 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1192 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1193 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1194 {
1195 PyObject* seq;
1196 Py_ssize_t i;
1197
1198 seq = PySequence_Fast(elements, "");
1199 if (!seq) {
1200 PyErr_Format(
1201 PyExc_TypeError,
1202 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1203 );
1204 return NULL;
1205 }
1206
1207 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1208 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1209 Py_INCREF(element);
1210 if (element_add_subelement(self, element) < 0) {
1211 Py_DECREF(seq);
1212 Py_DECREF(element);
1213 return NULL;
1214 }
1215 Py_DECREF(element);
1216 }
1217
1218 Py_DECREF(seq);
1219
1220 Py_RETURN_NONE;
1221 }
1222
1223 /*[clinic input]
1224 _elementtree.Element.find
1225
1226 path: object
1227 namespaces: object = None
1228
1229 [clinic start generated code]*/
1230
1231 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1232 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1233 PyObject *namespaces)
1234 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1235 {
1236 Py_ssize_t i;
1237 elementtreestate *st = ET_STATE_GLOBAL;
1238
1239 if (checkpath(path) || namespaces != Py_None) {
1240 _Py_IDENTIFIER(find);
1241 return _PyObject_CallMethodIdObjArgs(
1242 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1243 );
1244 }
1245
1246 if (!self->extra)
1247 Py_RETURN_NONE;
1248
1249 for (i = 0; i < self->extra->length; i++) {
1250 PyObject* item = self->extra->children[i];
1251 int rc;
1252 assert(Element_Check(item));
1253 Py_INCREF(item);
1254 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1255 if (rc > 0)
1256 return item;
1257 Py_DECREF(item);
1258 if (rc < 0)
1259 return NULL;
1260 }
1261
1262 Py_RETURN_NONE;
1263 }
1264
1265 /*[clinic input]
1266 _elementtree.Element.findtext
1267
1268 path: object
1269 default: object = None
1270 namespaces: object = None
1271
1272 [clinic start generated code]*/
1273
1274 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1275 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1276 PyObject *default_value,
1277 PyObject *namespaces)
1278 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1279 {
1280 Py_ssize_t i;
1281 _Py_IDENTIFIER(findtext);
1282 elementtreestate *st = ET_STATE_GLOBAL;
1283
1284 if (checkpath(path) || namespaces != Py_None)
1285 return _PyObject_CallMethodIdObjArgs(
1286 st->elementpath_obj, &PyId_findtext,
1287 self, path, default_value, namespaces, NULL
1288 );
1289
1290 if (!self->extra) {
1291 Py_INCREF(default_value);
1292 return default_value;
1293 }
1294
1295 for (i = 0; i < self->extra->length; i++) {
1296 PyObject *item = self->extra->children[i];
1297 int rc;
1298 assert(Element_Check(item));
1299 Py_INCREF(item);
1300 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1301 if (rc > 0) {
1302 PyObject* text = element_get_text((ElementObject*)item);
1303 if (text == Py_None) {
1304 Py_DECREF(item);
1305 return PyUnicode_New(0, 0);
1306 }
1307 Py_XINCREF(text);
1308 Py_DECREF(item);
1309 return text;
1310 }
1311 Py_DECREF(item);
1312 if (rc < 0)
1313 return NULL;
1314 }
1315
1316 Py_INCREF(default_value);
1317 return default_value;
1318 }
1319
1320 /*[clinic input]
1321 _elementtree.Element.findall
1322
1323 path: object
1324 namespaces: object = None
1325
1326 [clinic start generated code]*/
1327
1328 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1329 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1330 PyObject *namespaces)
1331 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1332 {
1333 Py_ssize_t i;
1334 PyObject* out;
1335 elementtreestate *st = ET_STATE_GLOBAL;
1336
1337 if (checkpath(path) || namespaces != Py_None) {
1338 _Py_IDENTIFIER(findall);
1339 return _PyObject_CallMethodIdObjArgs(
1340 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1341 );
1342 }
1343
1344 out = PyList_New(0);
1345 if (!out)
1346 return NULL;
1347
1348 if (!self->extra)
1349 return out;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 int rc;
1354 assert(Element_Check(item));
1355 Py_INCREF(item);
1356 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1357 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1358 Py_DECREF(item);
1359 Py_DECREF(out);
1360 return NULL;
1361 }
1362 Py_DECREF(item);
1363 }
1364
1365 return out;
1366 }
1367
1368 /*[clinic input]
1369 _elementtree.Element.iterfind
1370
1371 path: object
1372 namespaces: object = None
1373
1374 [clinic start generated code]*/
1375
1376 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1377 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1378 PyObject *namespaces)
1379 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1380 {
1381 PyObject* tag = path;
1382 _Py_IDENTIFIER(iterfind);
1383 elementtreestate *st = ET_STATE_GLOBAL;
1384
1385 return _PyObject_CallMethodIdObjArgs(
1386 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1387 }
1388
1389 /*[clinic input]
1390 _elementtree.Element.get
1391
1392 key: object
1393 default: object = None
1394
1395 [clinic start generated code]*/
1396
1397 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1398 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1399 PyObject *default_value)
1400 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1401 {
1402 PyObject* value;
1403
1404 if (!self->extra || self->extra->attrib == Py_None)
1405 value = default_value;
1406 else {
1407 value = PyDict_GetItemWithError(self->extra->attrib, key);
1408 if (!value) {
1409 if (PyErr_Occurred()) {
1410 return NULL;
1411 }
1412 value = default_value;
1413 }
1414 }
1415
1416 Py_INCREF(value);
1417 return value;
1418 }
1419
1420 /*[clinic input]
1421 _elementtree.Element.getchildren
1422
1423 [clinic start generated code]*/
1424
1425 static PyObject *
_elementtree_Element_getchildren_impl(ElementObject * self)1426 _elementtree_Element_getchildren_impl(ElementObject *self)
1427 /*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
1428 {
1429 Py_ssize_t i;
1430 PyObject* list;
1431
1432 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1433 "This method will be removed in future versions. "
1434 "Use 'list(elem)' or iteration over elem instead.",
1435 1) < 0) {
1436 return NULL;
1437 }
1438
1439 if (!self->extra)
1440 return PyList_New(0);
1441
1442 list = PyList_New(self->extra->length);
1443 if (!list)
1444 return NULL;
1445
1446 for (i = 0; i < self->extra->length; i++) {
1447 PyObject* item = self->extra->children[i];
1448 Py_INCREF(item);
1449 PyList_SET_ITEM(list, i, item);
1450 }
1451
1452 return list;
1453 }
1454
1455
1456 static PyObject *
1457 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1458
1459
1460 /*[clinic input]
1461 _elementtree.Element.iter
1462
1463 tag: object = None
1464
1465 [clinic start generated code]*/
1466
1467 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1468 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1469 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1470 {
1471 if (PyUnicode_Check(tag)) {
1472 if (PyUnicode_READY(tag) < 0)
1473 return NULL;
1474 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1475 tag = Py_None;
1476 }
1477 else if (PyBytes_Check(tag)) {
1478 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1479 tag = Py_None;
1480 }
1481
1482 return create_elementiter(self, tag, 0);
1483 }
1484
1485
1486 /*[clinic input]
1487 _elementtree.Element.getiterator
1488
1489 tag: object = None
1490
1491 [clinic start generated code]*/
1492
1493 static PyObject *
_elementtree_Element_getiterator_impl(ElementObject * self,PyObject * tag)1494 _elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1495 /*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1496 {
1497 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1498 "This method will be removed in future versions. "
1499 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1500 1) < 0) {
1501 return NULL;
1502 }
1503 return _elementtree_Element_iter_impl(self, tag);
1504 }
1505
1506
1507 /*[clinic input]
1508 _elementtree.Element.itertext
1509
1510 [clinic start generated code]*/
1511
1512 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1513 _elementtree_Element_itertext_impl(ElementObject *self)
1514 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1515 {
1516 return create_elementiter(self, Py_None, 1);
1517 }
1518
1519
1520 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1521 element_getitem(PyObject* self_, Py_ssize_t index)
1522 {
1523 ElementObject* self = (ElementObject*) self_;
1524
1525 if (!self->extra || index < 0 || index >= self->extra->length) {
1526 PyErr_SetString(
1527 PyExc_IndexError,
1528 "child index out of range"
1529 );
1530 return NULL;
1531 }
1532
1533 Py_INCREF(self->extra->children[index]);
1534 return self->extra->children[index];
1535 }
1536
1537 /*[clinic input]
1538 _elementtree.Element.insert
1539
1540 index: Py_ssize_t
1541 subelement: object(subclass_of='&Element_Type')
1542 /
1543
1544 [clinic start generated code]*/
1545
1546 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1547 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1548 PyObject *subelement)
1549 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1550 {
1551 Py_ssize_t i;
1552
1553 if (!self->extra) {
1554 if (create_extra(self, NULL) < 0)
1555 return NULL;
1556 }
1557
1558 if (index < 0) {
1559 index += self->extra->length;
1560 if (index < 0)
1561 index = 0;
1562 }
1563 if (index > self->extra->length)
1564 index = self->extra->length;
1565
1566 if (element_resize(self, 1) < 0)
1567 return NULL;
1568
1569 for (i = self->extra->length; i > index; i--)
1570 self->extra->children[i] = self->extra->children[i-1];
1571
1572 Py_INCREF(subelement);
1573 self->extra->children[index] = subelement;
1574
1575 self->extra->length++;
1576
1577 Py_RETURN_NONE;
1578 }
1579
1580 /*[clinic input]
1581 _elementtree.Element.items
1582
1583 [clinic start generated code]*/
1584
1585 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1586 _elementtree_Element_items_impl(ElementObject *self)
1587 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1588 {
1589 if (!self->extra || self->extra->attrib == Py_None)
1590 return PyList_New(0);
1591
1592 return PyDict_Items(self->extra->attrib);
1593 }
1594
1595 /*[clinic input]
1596 _elementtree.Element.keys
1597
1598 [clinic start generated code]*/
1599
1600 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1601 _elementtree_Element_keys_impl(ElementObject *self)
1602 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1603 {
1604 if (!self->extra || self->extra->attrib == Py_None)
1605 return PyList_New(0);
1606
1607 return PyDict_Keys(self->extra->attrib);
1608 }
1609
1610 static Py_ssize_t
element_length(ElementObject * self)1611 element_length(ElementObject* self)
1612 {
1613 if (!self->extra)
1614 return 0;
1615
1616 return self->extra->length;
1617 }
1618
1619 /*[clinic input]
1620 _elementtree.Element.makeelement
1621
1622 tag: object
1623 attrib: object
1624 /
1625
1626 [clinic start generated code]*/
1627
1628 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1629 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1630 PyObject *attrib)
1631 /*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
1632 {
1633 PyObject* elem;
1634
1635 attrib = PyDict_Copy(attrib);
1636 if (!attrib)
1637 return NULL;
1638
1639 elem = create_new_element(tag, attrib);
1640
1641 Py_DECREF(attrib);
1642
1643 return elem;
1644 }
1645
1646 /*[clinic input]
1647 _elementtree.Element.remove
1648
1649 subelement: object(subclass_of='&Element_Type')
1650 /
1651
1652 [clinic start generated code]*/
1653
1654 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1655 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1656 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1657 {
1658 Py_ssize_t i;
1659 int rc;
1660 PyObject *found;
1661
1662 if (!self->extra) {
1663 /* element has no children, so raise exception */
1664 PyErr_SetString(
1665 PyExc_ValueError,
1666 "list.remove(x): x not in list"
1667 );
1668 return NULL;
1669 }
1670
1671 for (i = 0; i < self->extra->length; i++) {
1672 if (self->extra->children[i] == subelement)
1673 break;
1674 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1675 if (rc > 0)
1676 break;
1677 if (rc < 0)
1678 return NULL;
1679 }
1680
1681 if (i >= self->extra->length) {
1682 /* subelement is not in children, so raise exception */
1683 PyErr_SetString(
1684 PyExc_ValueError,
1685 "list.remove(x): x not in list"
1686 );
1687 return NULL;
1688 }
1689
1690 found = self->extra->children[i];
1691
1692 self->extra->length--;
1693 for (; i < self->extra->length; i++)
1694 self->extra->children[i] = self->extra->children[i+1];
1695
1696 Py_DECREF(found);
1697 Py_RETURN_NONE;
1698 }
1699
1700 static PyObject*
element_repr(ElementObject * self)1701 element_repr(ElementObject* self)
1702 {
1703 int status;
1704
1705 if (self->tag == NULL)
1706 return PyUnicode_FromFormat("<Element at %p>", self);
1707
1708 status = Py_ReprEnter((PyObject *)self);
1709 if (status == 0) {
1710 PyObject *res;
1711 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1712 Py_ReprLeave((PyObject *)self);
1713 return res;
1714 }
1715 if (status > 0)
1716 PyErr_Format(PyExc_RuntimeError,
1717 "reentrant call inside %s.__repr__",
1718 Py_TYPE(self)->tp_name);
1719 return NULL;
1720 }
1721
1722 /*[clinic input]
1723 _elementtree.Element.set
1724
1725 key: object
1726 value: object
1727 /
1728
1729 [clinic start generated code]*/
1730
1731 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1732 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1733 PyObject *value)
1734 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1735 {
1736 PyObject* attrib;
1737
1738 if (!self->extra) {
1739 if (create_extra(self, NULL) < 0)
1740 return NULL;
1741 }
1742
1743 attrib = element_get_attrib(self);
1744 if (!attrib)
1745 return NULL;
1746
1747 if (PyDict_SetItem(attrib, key, value) < 0)
1748 return NULL;
1749
1750 Py_RETURN_NONE;
1751 }
1752
1753 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1754 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1755 {
1756 ElementObject* self = (ElementObject*) self_;
1757 Py_ssize_t i;
1758 PyObject* old;
1759
1760 if (!self->extra || index < 0 || index >= self->extra->length) {
1761 PyErr_SetString(
1762 PyExc_IndexError,
1763 "child assignment index out of range");
1764 return -1;
1765 }
1766
1767 old = self->extra->children[index];
1768
1769 if (item) {
1770 if (!Element_Check(item)) {
1771 raise_type_error(item);
1772 return -1;
1773 }
1774 Py_INCREF(item);
1775 self->extra->children[index] = item;
1776 } else {
1777 self->extra->length--;
1778 for (i = index; i < self->extra->length; i++)
1779 self->extra->children[i] = self->extra->children[i+1];
1780 }
1781
1782 Py_DECREF(old);
1783
1784 return 0;
1785 }
1786
1787 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1788 element_subscr(PyObject* self_, PyObject* item)
1789 {
1790 ElementObject* self = (ElementObject*) self_;
1791
1792 if (PyIndex_Check(item)) {
1793 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1794
1795 if (i == -1 && PyErr_Occurred()) {
1796 return NULL;
1797 }
1798 if (i < 0 && self->extra)
1799 i += self->extra->length;
1800 return element_getitem(self_, i);
1801 }
1802 else if (PySlice_Check(item)) {
1803 Py_ssize_t start, stop, step, slicelen, i;
1804 size_t cur;
1805 PyObject* list;
1806
1807 if (!self->extra)
1808 return PyList_New(0);
1809
1810 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1811 return NULL;
1812 }
1813 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1814 step);
1815
1816 if (slicelen <= 0)
1817 return PyList_New(0);
1818 else {
1819 list = PyList_New(slicelen);
1820 if (!list)
1821 return NULL;
1822
1823 for (cur = start, i = 0; i < slicelen;
1824 cur += step, i++) {
1825 PyObject* item = self->extra->children[cur];
1826 Py_INCREF(item);
1827 PyList_SET_ITEM(list, i, item);
1828 }
1829
1830 return list;
1831 }
1832 }
1833 else {
1834 PyErr_SetString(PyExc_TypeError,
1835 "element indices must be integers");
1836 return NULL;
1837 }
1838 }
1839
1840 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1841 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1842 {
1843 ElementObject* self = (ElementObject*) self_;
1844
1845 if (PyIndex_Check(item)) {
1846 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1847
1848 if (i == -1 && PyErr_Occurred()) {
1849 return -1;
1850 }
1851 if (i < 0 && self->extra)
1852 i += self->extra->length;
1853 return element_setitem(self_, i, value);
1854 }
1855 else if (PySlice_Check(item)) {
1856 Py_ssize_t start, stop, step, slicelen, newlen, i;
1857 size_t cur;
1858
1859 PyObject* recycle = NULL;
1860 PyObject* seq;
1861
1862 if (!self->extra) {
1863 if (create_extra(self, NULL) < 0)
1864 return -1;
1865 }
1866
1867 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1868 return -1;
1869 }
1870 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1871 step);
1872
1873 if (value == NULL) {
1874 /* Delete slice */
1875 size_t cur;
1876 Py_ssize_t i;
1877
1878 if (slicelen <= 0)
1879 return 0;
1880
1881 /* Since we're deleting, the direction of the range doesn't matter,
1882 * so for simplicity make it always ascending.
1883 */
1884 if (step < 0) {
1885 stop = start + 1;
1886 start = stop + step * (slicelen - 1) - 1;
1887 step = -step;
1888 }
1889
1890 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1891
1892 /* recycle is a list that will contain all the children
1893 * scheduled for removal.
1894 */
1895 if (!(recycle = PyList_New(slicelen))) {
1896 return -1;
1897 }
1898
1899 /* This loop walks over all the children that have to be deleted,
1900 * with cur pointing at them. num_moved is the amount of children
1901 * until the next deleted child that have to be "shifted down" to
1902 * occupy the deleted's places.
1903 * Note that in the ith iteration, shifting is done i+i places down
1904 * because i children were already removed.
1905 */
1906 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1907 /* Compute how many children have to be moved, clipping at the
1908 * list end.
1909 */
1910 Py_ssize_t num_moved = step - 1;
1911 if (cur + step >= (size_t)self->extra->length) {
1912 num_moved = self->extra->length - cur - 1;
1913 }
1914
1915 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1916
1917 memmove(
1918 self->extra->children + cur - i,
1919 self->extra->children + cur + 1,
1920 num_moved * sizeof(PyObject *));
1921 }
1922
1923 /* Leftover "tail" after the last removed child */
1924 cur = start + (size_t)slicelen * step;
1925 if (cur < (size_t)self->extra->length) {
1926 memmove(
1927 self->extra->children + cur - slicelen,
1928 self->extra->children + cur,
1929 (self->extra->length - cur) * sizeof(PyObject *));
1930 }
1931
1932 self->extra->length -= slicelen;
1933
1934 /* Discard the recycle list with all the deleted sub-elements */
1935 Py_DECREF(recycle);
1936 return 0;
1937 }
1938
1939 /* A new slice is actually being assigned */
1940 seq = PySequence_Fast(value, "");
1941 if (!seq) {
1942 PyErr_Format(
1943 PyExc_TypeError,
1944 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1945 );
1946 return -1;
1947 }
1948 newlen = PySequence_Fast_GET_SIZE(seq);
1949
1950 if (step != 1 && newlen != slicelen)
1951 {
1952 Py_DECREF(seq);
1953 PyErr_Format(PyExc_ValueError,
1954 "attempt to assign sequence of size %zd "
1955 "to extended slice of size %zd",
1956 newlen, slicelen
1957 );
1958 return -1;
1959 }
1960
1961 /* Resize before creating the recycle bin, to prevent refleaks. */
1962 if (newlen > slicelen) {
1963 if (element_resize(self, newlen - slicelen) < 0) {
1964 Py_DECREF(seq);
1965 return -1;
1966 }
1967 }
1968
1969 for (i = 0; i < newlen; i++) {
1970 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1971 if (!Element_Check(element)) {
1972 raise_type_error(element);
1973 Py_DECREF(seq);
1974 return -1;
1975 }
1976 }
1977
1978 if (slicelen > 0) {
1979 /* to avoid recursive calls to this method (via decref), move
1980 old items to the recycle bin here, and get rid of them when
1981 we're done modifying the element */
1982 recycle = PyList_New(slicelen);
1983 if (!recycle) {
1984 Py_DECREF(seq);
1985 return -1;
1986 }
1987 for (cur = start, i = 0; i < slicelen;
1988 cur += step, i++)
1989 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1990 }
1991
1992 if (newlen < slicelen) {
1993 /* delete slice */
1994 for (i = stop; i < self->extra->length; i++)
1995 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1996 } else if (newlen > slicelen) {
1997 /* insert slice */
1998 for (i = self->extra->length-1; i >= stop; i--)
1999 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
2000 }
2001
2002 /* replace the slice */
2003 for (cur = start, i = 0; i < newlen;
2004 cur += step, i++) {
2005 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2006 Py_INCREF(element);
2007 self->extra->children[cur] = element;
2008 }
2009
2010 self->extra->length += newlen - slicelen;
2011
2012 Py_DECREF(seq);
2013
2014 /* discard the recycle bin, and everything in it */
2015 Py_XDECREF(recycle);
2016
2017 return 0;
2018 }
2019 else {
2020 PyErr_SetString(PyExc_TypeError,
2021 "element indices must be integers");
2022 return -1;
2023 }
2024 }
2025
2026 static PyObject*
element_tag_getter(ElementObject * self,void * closure)2027 element_tag_getter(ElementObject *self, void *closure)
2028 {
2029 PyObject *res = self->tag;
2030 Py_INCREF(res);
2031 return res;
2032 }
2033
2034 static PyObject*
element_text_getter(ElementObject * self,void * closure)2035 element_text_getter(ElementObject *self, void *closure)
2036 {
2037 PyObject *res = element_get_text(self);
2038 Py_XINCREF(res);
2039 return res;
2040 }
2041
2042 static PyObject*
element_tail_getter(ElementObject * self,void * closure)2043 element_tail_getter(ElementObject *self, void *closure)
2044 {
2045 PyObject *res = element_get_tail(self);
2046 Py_XINCREF(res);
2047 return res;
2048 }
2049
2050 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)2051 element_attrib_getter(ElementObject *self, void *closure)
2052 {
2053 PyObject *res;
2054 if (!self->extra) {
2055 if (create_extra(self, NULL) < 0)
2056 return NULL;
2057 }
2058 res = element_get_attrib(self);
2059 Py_XINCREF(res);
2060 return res;
2061 }
2062
2063 /* macro for setter validation */
2064 #define _VALIDATE_ATTR_VALUE(V) \
2065 if ((V) == NULL) { \
2066 PyErr_SetString( \
2067 PyExc_AttributeError, \
2068 "can't delete element attribute"); \
2069 return -1; \
2070 }
2071
2072 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2073 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2074 {
2075 _VALIDATE_ATTR_VALUE(value);
2076 Py_INCREF(value);
2077 Py_SETREF(self->tag, value);
2078 return 0;
2079 }
2080
2081 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2082 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2083 {
2084 _VALIDATE_ATTR_VALUE(value);
2085 Py_INCREF(value);
2086 _set_joined_ptr(&self->text, value);
2087 return 0;
2088 }
2089
2090 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2091 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2092 {
2093 _VALIDATE_ATTR_VALUE(value);
2094 Py_INCREF(value);
2095 _set_joined_ptr(&self->tail, value);
2096 return 0;
2097 }
2098
2099 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2100 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2101 {
2102 _VALIDATE_ATTR_VALUE(value);
2103 if (!self->extra) {
2104 if (create_extra(self, NULL) < 0)
2105 return -1;
2106 }
2107 Py_INCREF(value);
2108 Py_SETREF(self->extra->attrib, value);
2109 return 0;
2110 }
2111
2112 static PySequenceMethods element_as_sequence = {
2113 (lenfunc) element_length,
2114 0, /* sq_concat */
2115 0, /* sq_repeat */
2116 element_getitem,
2117 0,
2118 element_setitem,
2119 0,
2120 };
2121
2122 /******************************* Element iterator ****************************/
2123
2124 /* ElementIterObject represents the iteration state over an XML element in
2125 * pre-order traversal. To keep track of which sub-element should be returned
2126 * next, a stack of parents is maintained. This is a standard stack-based
2127 * iterative pre-order traversal of a tree.
2128 * The stack is managed using a continuous array.
2129 * Each stack item contains the saved parent to which we should return after
2130 * the current one is exhausted, and the next child to examine in that parent.
2131 */
2132 typedef struct ParentLocator_t {
2133 ElementObject *parent;
2134 Py_ssize_t child_index;
2135 } ParentLocator;
2136
2137 typedef struct {
2138 PyObject_HEAD
2139 ParentLocator *parent_stack;
2140 Py_ssize_t parent_stack_used;
2141 Py_ssize_t parent_stack_size;
2142 ElementObject *root_element;
2143 PyObject *sought_tag;
2144 int gettext;
2145 } ElementIterObject;
2146
2147
2148 static void
elementiter_dealloc(ElementIterObject * it)2149 elementiter_dealloc(ElementIterObject *it)
2150 {
2151 Py_ssize_t i = it->parent_stack_used;
2152 it->parent_stack_used = 0;
2153 /* bpo-31095: UnTrack is needed before calling any callbacks */
2154 PyObject_GC_UnTrack(it);
2155 while (i--)
2156 Py_XDECREF(it->parent_stack[i].parent);
2157 PyMem_Free(it->parent_stack);
2158
2159 Py_XDECREF(it->sought_tag);
2160 Py_XDECREF(it->root_element);
2161
2162 PyObject_GC_Del(it);
2163 }
2164
2165 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2166 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2167 {
2168 Py_ssize_t i = it->parent_stack_used;
2169 while (i--)
2170 Py_VISIT(it->parent_stack[i].parent);
2171
2172 Py_VISIT(it->root_element);
2173 Py_VISIT(it->sought_tag);
2174 return 0;
2175 }
2176
2177 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2178 */
2179 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2180 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2181 {
2182 ParentLocator *item;
2183
2184 if (it->parent_stack_used >= it->parent_stack_size) {
2185 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2186 ParentLocator *parent_stack = it->parent_stack;
2187 PyMem_Resize(parent_stack, ParentLocator, new_size);
2188 if (parent_stack == NULL)
2189 return -1;
2190 it->parent_stack = parent_stack;
2191 it->parent_stack_size = new_size;
2192 }
2193 item = it->parent_stack + it->parent_stack_used++;
2194 Py_INCREF(parent);
2195 item->parent = parent;
2196 item->child_index = 0;
2197 return 0;
2198 }
2199
2200 static PyObject *
elementiter_next(ElementIterObject * it)2201 elementiter_next(ElementIterObject *it)
2202 {
2203 /* Sub-element iterator.
2204 *
2205 * A short note on gettext: this function serves both the iter() and
2206 * itertext() methods to avoid code duplication. However, there are a few
2207 * small differences in the way these iterations work. Namely:
2208 * - itertext() only yields text from nodes that have it, and continues
2209 * iterating when a node doesn't have text (so it doesn't return any
2210 * node like iter())
2211 * - itertext() also has to handle tail, after finishing with all the
2212 * children of a node.
2213 */
2214 int rc;
2215 ElementObject *elem;
2216 PyObject *text;
2217
2218 while (1) {
2219 /* Handle the case reached in the beginning and end of iteration, where
2220 * the parent stack is empty. If root_element is NULL and we're here, the
2221 * iterator is exhausted.
2222 */
2223 if (!it->parent_stack_used) {
2224 if (!it->root_element) {
2225 PyErr_SetNone(PyExc_StopIteration);
2226 return NULL;
2227 }
2228
2229 elem = it->root_element; /* steals a reference */
2230 it->root_element = NULL;
2231 }
2232 else {
2233 /* See if there are children left to traverse in the current parent. If
2234 * yes, visit the next child. If not, pop the stack and try again.
2235 */
2236 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2237 Py_ssize_t child_index = item->child_index;
2238 ElementObjectExtra *extra;
2239 elem = item->parent;
2240 extra = elem->extra;
2241 if (!extra || child_index >= extra->length) {
2242 it->parent_stack_used--;
2243 /* Note that extra condition on it->parent_stack_used here;
2244 * this is because itertext() is supposed to only return *inner*
2245 * text, not text following the element it began iteration with.
2246 */
2247 if (it->gettext && it->parent_stack_used) {
2248 text = element_get_tail(elem);
2249 goto gettext;
2250 }
2251 Py_DECREF(elem);
2252 continue;
2253 }
2254
2255 assert(Element_Check(extra->children[child_index]));
2256 elem = (ElementObject *)extra->children[child_index];
2257 item->child_index++;
2258 Py_INCREF(elem);
2259 }
2260
2261 if (parent_stack_push_new(it, elem) < 0) {
2262 Py_DECREF(elem);
2263 PyErr_NoMemory();
2264 return NULL;
2265 }
2266 if (it->gettext) {
2267 text = element_get_text(elem);
2268 goto gettext;
2269 }
2270
2271 if (it->sought_tag == Py_None)
2272 return (PyObject *)elem;
2273
2274 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2275 if (rc > 0)
2276 return (PyObject *)elem;
2277
2278 Py_DECREF(elem);
2279 if (rc < 0)
2280 return NULL;
2281 continue;
2282
2283 gettext:
2284 if (!text) {
2285 Py_DECREF(elem);
2286 return NULL;
2287 }
2288 if (text == Py_None) {
2289 Py_DECREF(elem);
2290 }
2291 else {
2292 Py_INCREF(text);
2293 Py_DECREF(elem);
2294 rc = PyObject_IsTrue(text);
2295 if (rc > 0)
2296 return text;
2297 Py_DECREF(text);
2298 if (rc < 0)
2299 return NULL;
2300 }
2301 }
2302
2303 return NULL;
2304 }
2305
2306
2307 static PyTypeObject ElementIter_Type = {
2308 PyVarObject_HEAD_INIT(NULL, 0)
2309 /* Using the module's name since the pure-Python implementation does not
2310 have such a type. */
2311 "_elementtree._element_iterator", /* tp_name */
2312 sizeof(ElementIterObject), /* tp_basicsize */
2313 0, /* tp_itemsize */
2314 /* methods */
2315 (destructor)elementiter_dealloc, /* tp_dealloc */
2316 0, /* tp_vectorcall_offset */
2317 0, /* tp_getattr */
2318 0, /* tp_setattr */
2319 0, /* tp_as_async */
2320 0, /* tp_repr */
2321 0, /* tp_as_number */
2322 0, /* tp_as_sequence */
2323 0, /* tp_as_mapping */
2324 0, /* tp_hash */
2325 0, /* tp_call */
2326 0, /* tp_str */
2327 0, /* tp_getattro */
2328 0, /* tp_setattro */
2329 0, /* tp_as_buffer */
2330 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2331 0, /* tp_doc */
2332 (traverseproc)elementiter_traverse, /* tp_traverse */
2333 0, /* tp_clear */
2334 0, /* tp_richcompare */
2335 0, /* tp_weaklistoffset */
2336 PyObject_SelfIter, /* tp_iter */
2337 (iternextfunc)elementiter_next, /* tp_iternext */
2338 0, /* tp_methods */
2339 0, /* tp_members */
2340 0, /* tp_getset */
2341 0, /* tp_base */
2342 0, /* tp_dict */
2343 0, /* tp_descr_get */
2344 0, /* tp_descr_set */
2345 0, /* tp_dictoffset */
2346 0, /* tp_init */
2347 0, /* tp_alloc */
2348 0, /* tp_new */
2349 };
2350
2351 #define INIT_PARENT_STACK_SIZE 8
2352
2353 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2354 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2355 {
2356 ElementIterObject *it;
2357
2358 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2359 if (!it)
2360 return NULL;
2361
2362 Py_INCREF(tag);
2363 it->sought_tag = tag;
2364 it->gettext = gettext;
2365 Py_INCREF(self);
2366 it->root_element = self;
2367
2368 PyObject_GC_Track(it);
2369
2370 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2371 if (it->parent_stack == NULL) {
2372 Py_DECREF(it);
2373 PyErr_NoMemory();
2374 return NULL;
2375 }
2376 it->parent_stack_used = 0;
2377 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2378
2379 return (PyObject *)it;
2380 }
2381
2382
2383 /* ==================================================================== */
2384 /* the tree builder type */
2385
2386 typedef struct {
2387 PyObject_HEAD
2388
2389 PyObject *root; /* root node (first created node) */
2390
2391 PyObject *this; /* current node */
2392 PyObject *last; /* most recently created node */
2393 PyObject *last_for_tail; /* most recently created node that takes a tail */
2394
2395 PyObject *data; /* data collector (string or list), or NULL */
2396
2397 PyObject *stack; /* element stack */
2398 Py_ssize_t index; /* current stack size (0 means empty) */
2399
2400 PyObject *element_factory;
2401 PyObject *comment_factory;
2402 PyObject *pi_factory;
2403
2404 /* element tracing */
2405 PyObject *events_append; /* the append method of the list of events, or NULL */
2406 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2407 PyObject *end_event_obj;
2408 PyObject *start_ns_event_obj;
2409 PyObject *end_ns_event_obj;
2410 PyObject *comment_event_obj;
2411 PyObject *pi_event_obj;
2412
2413 char insert_comments;
2414 char insert_pis;
2415 } TreeBuilderObject;
2416
2417 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
2418
2419 /* -------------------------------------------------------------------- */
2420 /* constructor and destructor */
2421
2422 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2423 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2424 {
2425 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2426 if (t != NULL) {
2427 t->root = NULL;
2428
2429 Py_INCREF(Py_None);
2430 t->this = Py_None;
2431 Py_INCREF(Py_None);
2432 t->last = Py_None;
2433
2434 t->data = NULL;
2435 t->element_factory = NULL;
2436 t->comment_factory = NULL;
2437 t->pi_factory = NULL;
2438 t->stack = PyList_New(20);
2439 if (!t->stack) {
2440 Py_DECREF(t->this);
2441 Py_DECREF(t->last);
2442 Py_DECREF((PyObject *) t);
2443 return NULL;
2444 }
2445 t->index = 0;
2446
2447 t->events_append = NULL;
2448 t->start_event_obj = t->end_event_obj = NULL;
2449 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2450 t->comment_event_obj = t->pi_event_obj = NULL;
2451 t->insert_comments = t->insert_pis = 0;
2452 }
2453 return (PyObject *)t;
2454 }
2455
2456 /*[clinic input]
2457 _elementtree.TreeBuilder.__init__
2458
2459 element_factory: object = None
2460 *
2461 comment_factory: object = None
2462 pi_factory: object = None
2463 insert_comments: bool = False
2464 insert_pis: bool = False
2465
2466 [clinic start generated code]*/
2467
2468 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2469 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2470 PyObject *element_factory,
2471 PyObject *comment_factory,
2472 PyObject *pi_factory,
2473 int insert_comments, int insert_pis)
2474 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2475 {
2476 if (element_factory != Py_None) {
2477 Py_INCREF(element_factory);
2478 Py_XSETREF(self->element_factory, element_factory);
2479 } else {
2480 Py_CLEAR(self->element_factory);
2481 }
2482
2483 if (comment_factory == Py_None) {
2484 elementtreestate *st = ET_STATE_GLOBAL;
2485 comment_factory = st->comment_factory;
2486 }
2487 if (comment_factory) {
2488 Py_INCREF(comment_factory);
2489 Py_XSETREF(self->comment_factory, comment_factory);
2490 self->insert_comments = insert_comments;
2491 } else {
2492 Py_CLEAR(self->comment_factory);
2493 self->insert_comments = 0;
2494 }
2495
2496 if (pi_factory == Py_None) {
2497 elementtreestate *st = ET_STATE_GLOBAL;
2498 pi_factory = st->pi_factory;
2499 }
2500 if (pi_factory) {
2501 Py_INCREF(pi_factory);
2502 Py_XSETREF(self->pi_factory, pi_factory);
2503 self->insert_pis = insert_pis;
2504 } else {
2505 Py_CLEAR(self->pi_factory);
2506 self->insert_pis = 0;
2507 }
2508
2509 return 0;
2510 }
2511
2512 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2513 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2514 {
2515 Py_VISIT(self->pi_event_obj);
2516 Py_VISIT(self->comment_event_obj);
2517 Py_VISIT(self->end_ns_event_obj);
2518 Py_VISIT(self->start_ns_event_obj);
2519 Py_VISIT(self->end_event_obj);
2520 Py_VISIT(self->start_event_obj);
2521 Py_VISIT(self->events_append);
2522 Py_VISIT(self->root);
2523 Py_VISIT(self->this);
2524 Py_VISIT(self->last);
2525 Py_VISIT(self->last_for_tail);
2526 Py_VISIT(self->data);
2527 Py_VISIT(self->stack);
2528 Py_VISIT(self->pi_factory);
2529 Py_VISIT(self->comment_factory);
2530 Py_VISIT(self->element_factory);
2531 return 0;
2532 }
2533
2534 static int
treebuilder_gc_clear(TreeBuilderObject * self)2535 treebuilder_gc_clear(TreeBuilderObject *self)
2536 {
2537 Py_CLEAR(self->pi_event_obj);
2538 Py_CLEAR(self->comment_event_obj);
2539 Py_CLEAR(self->end_ns_event_obj);
2540 Py_CLEAR(self->start_ns_event_obj);
2541 Py_CLEAR(self->end_event_obj);
2542 Py_CLEAR(self->start_event_obj);
2543 Py_CLEAR(self->events_append);
2544 Py_CLEAR(self->stack);
2545 Py_CLEAR(self->data);
2546 Py_CLEAR(self->last);
2547 Py_CLEAR(self->last_for_tail);
2548 Py_CLEAR(self->this);
2549 Py_CLEAR(self->pi_factory);
2550 Py_CLEAR(self->comment_factory);
2551 Py_CLEAR(self->element_factory);
2552 Py_CLEAR(self->root);
2553 return 0;
2554 }
2555
2556 static void
treebuilder_dealloc(TreeBuilderObject * self)2557 treebuilder_dealloc(TreeBuilderObject *self)
2558 {
2559 PyObject_GC_UnTrack(self);
2560 treebuilder_gc_clear(self);
2561 Py_TYPE(self)->tp_free((PyObject *)self);
2562 }
2563
2564 /* -------------------------------------------------------------------- */
2565 /* helpers for handling of arbitrary element-like objects */
2566
2567 /*[clinic input]
2568 _elementtree._set_factories
2569
2570 comment_factory: object
2571 pi_factory: object
2572 /
2573
2574 Change the factories used to create comments and processing instructions.
2575
2576 For internal use only.
2577 [clinic start generated code]*/
2578
2579 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2580 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2581 PyObject *pi_factory)
2582 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2583 {
2584 elementtreestate *st = ET_STATE_GLOBAL;
2585 PyObject *old;
2586
2587 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2588 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2589 Py_TYPE(comment_factory)->tp_name);
2590 return NULL;
2591 }
2592 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2593 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2594 Py_TYPE(pi_factory)->tp_name);
2595 return NULL;
2596 }
2597
2598 old = PyTuple_Pack(2,
2599 st->comment_factory ? st->comment_factory : Py_None,
2600 st->pi_factory ? st->pi_factory : Py_None);
2601
2602 if (comment_factory == Py_None) {
2603 Py_CLEAR(st->comment_factory);
2604 } else {
2605 Py_INCREF(comment_factory);
2606 Py_XSETREF(st->comment_factory, comment_factory);
2607 }
2608 if (pi_factory == Py_None) {
2609 Py_CLEAR(st->pi_factory);
2610 } else {
2611 Py_INCREF(pi_factory);
2612 Py_XSETREF(st->pi_factory, pi_factory);
2613 }
2614
2615 return old;
2616 }
2617
2618 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2619 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2620 PyObject **dest, _Py_Identifier *name)
2621 {
2622 /* Fast paths for the "almost always" cases. */
2623 if (Element_CheckExact(element)) {
2624 PyObject *dest_obj = JOIN_OBJ(*dest);
2625 if (dest_obj == Py_None) {
2626 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2627 *data = NULL;
2628 Py_DECREF(dest_obj);
2629 return 0;
2630 }
2631 else if (JOIN_GET(*dest)) {
2632 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2633 return -1;
2634 }
2635 Py_CLEAR(*data);
2636 return 0;
2637 }
2638 }
2639
2640 /* Fallback for the non-Element / non-trivial cases. */
2641 {
2642 int r;
2643 PyObject* joined;
2644 PyObject* previous = _PyObject_GetAttrId(element, name);
2645 if (!previous)
2646 return -1;
2647 joined = list_join(*data);
2648 if (!joined) {
2649 Py_DECREF(previous);
2650 return -1;
2651 }
2652 if (previous != Py_None) {
2653 PyObject *tmp = PyNumber_Add(previous, joined);
2654 Py_DECREF(joined);
2655 Py_DECREF(previous);
2656 if (!tmp)
2657 return -1;
2658 joined = tmp;
2659 } else {
2660 Py_DECREF(previous);
2661 }
2662
2663 r = _PyObject_SetAttrId(element, name, joined);
2664 Py_DECREF(joined);
2665 if (r < 0)
2666 return -1;
2667 Py_CLEAR(*data);
2668 return 0;
2669 }
2670 }
2671
2672 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2673 treebuilder_flush_data(TreeBuilderObject* self)
2674 {
2675 if (!self->data) {
2676 return 0;
2677 }
2678
2679 if (!self->last_for_tail) {
2680 PyObject *element = self->last;
2681 _Py_IDENTIFIER(text);
2682 return treebuilder_extend_element_text_or_tail(
2683 element, &self->data,
2684 &((ElementObject *) element)->text, &PyId_text);
2685 }
2686 else {
2687 PyObject *element = self->last_for_tail;
2688 _Py_IDENTIFIER(tail);
2689 return treebuilder_extend_element_text_or_tail(
2690 element, &self->data,
2691 &((ElementObject *) element)->tail, &PyId_tail);
2692 }
2693 }
2694
2695 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2696 treebuilder_add_subelement(PyObject *element, PyObject *child)
2697 {
2698 _Py_IDENTIFIER(append);
2699 if (Element_CheckExact(element)) {
2700 ElementObject *elem = (ElementObject *) element;
2701 return element_add_subelement(elem, child);
2702 }
2703 else {
2704 PyObject *res;
2705 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
2706 if (res == NULL)
2707 return -1;
2708 Py_DECREF(res);
2709 return 0;
2710 }
2711 }
2712
2713 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2714 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2715 PyObject *node)
2716 {
2717 if (action != NULL) {
2718 PyObject *res;
2719 PyObject *event = PyTuple_Pack(2, action, node);
2720 if (event == NULL)
2721 return -1;
2722 res = _PyObject_FastCall(self->events_append, &event, 1);
2723 Py_DECREF(event);
2724 if (res == NULL)
2725 return -1;
2726 Py_DECREF(res);
2727 }
2728 return 0;
2729 }
2730
2731 /* -------------------------------------------------------------------- */
2732 /* handlers */
2733
2734 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2735 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2736 PyObject* attrib)
2737 {
2738 PyObject* node;
2739 PyObject* this;
2740 elementtreestate *st = ET_STATE_GLOBAL;
2741
2742 if (treebuilder_flush_data(self) < 0) {
2743 return NULL;
2744 }
2745
2746 if (!self->element_factory) {
2747 node = create_new_element(tag, attrib);
2748 } else if (attrib == Py_None) {
2749 attrib = PyDict_New();
2750 if (!attrib)
2751 return NULL;
2752 node = PyObject_CallFunctionObjArgs(self->element_factory,
2753 tag, attrib, NULL);
2754 Py_DECREF(attrib);
2755 }
2756 else {
2757 node = PyObject_CallFunctionObjArgs(self->element_factory,
2758 tag, attrib, NULL);
2759 }
2760 if (!node) {
2761 return NULL;
2762 }
2763
2764 this = self->this;
2765 Py_CLEAR(self->last_for_tail);
2766
2767 if (this != Py_None) {
2768 if (treebuilder_add_subelement(this, node) < 0)
2769 goto error;
2770 } else {
2771 if (self->root) {
2772 PyErr_SetString(
2773 st->parseerror_obj,
2774 "multiple elements on top level"
2775 );
2776 goto error;
2777 }
2778 Py_INCREF(node);
2779 self->root = node;
2780 }
2781
2782 if (self->index < PyList_GET_SIZE(self->stack)) {
2783 if (PyList_SetItem(self->stack, self->index, this) < 0)
2784 goto error;
2785 Py_INCREF(this);
2786 } else {
2787 if (PyList_Append(self->stack, this) < 0)
2788 goto error;
2789 }
2790 self->index++;
2791
2792 Py_INCREF(node);
2793 Py_SETREF(self->this, node);
2794 Py_INCREF(node);
2795 Py_SETREF(self->last, node);
2796
2797 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2798 goto error;
2799
2800 return node;
2801
2802 error:
2803 Py_DECREF(node);
2804 return NULL;
2805 }
2806
2807 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2808 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2809 {
2810 if (!self->data) {
2811 if (self->last == Py_None) {
2812 /* ignore calls to data before the first call to start */
2813 Py_RETURN_NONE;
2814 }
2815 /* store the first item as is */
2816 Py_INCREF(data); self->data = data;
2817 } else {
2818 /* more than one item; use a list to collect items */
2819 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2820 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2821 /* XXX this code path unused in Python 3? */
2822 /* expat often generates single character data sections; handle
2823 the most common case by resizing the existing string... */
2824 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2825 if (_PyBytes_Resize(&self->data, size + 1) < 0)
2826 return NULL;
2827 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2828 } else if (PyList_CheckExact(self->data)) {
2829 if (PyList_Append(self->data, data) < 0)
2830 return NULL;
2831 } else {
2832 PyObject* list = PyList_New(2);
2833 if (!list)
2834 return NULL;
2835 PyList_SET_ITEM(list, 0, self->data);
2836 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2837 self->data = list;
2838 }
2839 }
2840
2841 Py_RETURN_NONE;
2842 }
2843
2844 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2845 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2846 {
2847 PyObject* item;
2848
2849 if (treebuilder_flush_data(self) < 0) {
2850 return NULL;
2851 }
2852
2853 if (self->index == 0) {
2854 PyErr_SetString(
2855 PyExc_IndexError,
2856 "pop from empty stack"
2857 );
2858 return NULL;
2859 }
2860
2861 item = self->last;
2862 self->last = self->this;
2863 Py_INCREF(self->last);
2864 Py_XSETREF(self->last_for_tail, self->last);
2865 self->index--;
2866 self->this = PyList_GET_ITEM(self->stack, self->index);
2867 Py_INCREF(self->this);
2868 Py_DECREF(item);
2869
2870 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2871 return NULL;
2872
2873 Py_INCREF(self->last);
2874 return (PyObject*) self->last;
2875 }
2876
2877 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2878 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2879 {
2880 PyObject* comment;
2881 PyObject* this;
2882
2883 if (treebuilder_flush_data(self) < 0) {
2884 return NULL;
2885 }
2886
2887 if (self->comment_factory) {
2888 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2889 if (!comment)
2890 return NULL;
2891
2892 this = self->this;
2893 if (self->insert_comments && this != Py_None) {
2894 if (treebuilder_add_subelement(this, comment) < 0)
2895 goto error;
2896 Py_INCREF(comment);
2897 Py_XSETREF(self->last_for_tail, comment);
2898 }
2899 } else {
2900 Py_INCREF(text);
2901 comment = text;
2902 }
2903
2904 if (self->events_append && self->comment_event_obj) {
2905 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2906 goto error;
2907 }
2908
2909 return comment;
2910
2911 error:
2912 Py_DECREF(comment);
2913 return NULL;
2914 }
2915
2916 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2917 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2918 {
2919 PyObject* pi;
2920 PyObject* this;
2921 PyObject* stack[2] = {target, text};
2922
2923 if (treebuilder_flush_data(self) < 0) {
2924 return NULL;
2925 }
2926
2927 if (self->pi_factory) {
2928 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2929 if (!pi) {
2930 return NULL;
2931 }
2932
2933 this = self->this;
2934 if (self->insert_pis && this != Py_None) {
2935 if (treebuilder_add_subelement(this, pi) < 0)
2936 goto error;
2937 Py_INCREF(pi);
2938 Py_XSETREF(self->last_for_tail, pi);
2939 }
2940 } else {
2941 pi = PyTuple_Pack(2, target, text);
2942 if (!pi) {
2943 return NULL;
2944 }
2945 }
2946
2947 if (self->events_append && self->pi_event_obj) {
2948 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2949 goto error;
2950 }
2951
2952 return pi;
2953
2954 error:
2955 Py_DECREF(pi);
2956 return NULL;
2957 }
2958
2959 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2960 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2961 {
2962 PyObject* parcel;
2963
2964 if (self->events_append && self->start_ns_event_obj) {
2965 parcel = PyTuple_Pack(2, prefix, uri);
2966 if (!parcel) {
2967 return NULL;
2968 }
2969
2970 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2971 Py_DECREF(parcel);
2972 return NULL;
2973 }
2974 Py_DECREF(parcel);
2975 }
2976
2977 Py_RETURN_NONE;
2978 }
2979
2980 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2981 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2982 {
2983 if (self->events_append && self->end_ns_event_obj) {
2984 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2985 return NULL;
2986 }
2987 }
2988
2989 Py_RETURN_NONE;
2990 }
2991
2992 /* -------------------------------------------------------------------- */
2993 /* methods (in alphabetical order) */
2994
2995 /*[clinic input]
2996 _elementtree.TreeBuilder.data
2997
2998 data: object
2999 /
3000
3001 [clinic start generated code]*/
3002
3003 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)3004 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
3005 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
3006 {
3007 return treebuilder_handle_data(self, data);
3008 }
3009
3010 /*[clinic input]
3011 _elementtree.TreeBuilder.end
3012
3013 tag: object
3014 /
3015
3016 [clinic start generated code]*/
3017
3018 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)3019 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
3020 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
3021 {
3022 return treebuilder_handle_end(self, tag);
3023 }
3024
3025 /*[clinic input]
3026 _elementtree.TreeBuilder.comment
3027
3028 text: object
3029 /
3030
3031 [clinic start generated code]*/
3032
3033 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)3034 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
3035 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
3036 {
3037 return treebuilder_handle_comment(self, text);
3038 }
3039
3040 /*[clinic input]
3041 _elementtree.TreeBuilder.pi
3042
3043 target: object
3044 text: object = None
3045 /
3046
3047 [clinic start generated code]*/
3048
3049 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)3050 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
3051 PyObject *text)
3052 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
3053 {
3054 return treebuilder_handle_pi(self, target, text);
3055 }
3056
3057 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)3058 treebuilder_done(TreeBuilderObject* self)
3059 {
3060 PyObject* res;
3061
3062 /* FIXME: check stack size? */
3063
3064 if (self->root)
3065 res = self->root;
3066 else
3067 res = Py_None;
3068
3069 Py_INCREF(res);
3070 return res;
3071 }
3072
3073 /*[clinic input]
3074 _elementtree.TreeBuilder.close
3075
3076 [clinic start generated code]*/
3077
3078 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3079 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3080 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3081 {
3082 return treebuilder_done(self);
3083 }
3084
3085 /*[clinic input]
3086 _elementtree.TreeBuilder.start
3087
3088 tag: object
3089 attrs: object = None
3090 /
3091
3092 [clinic start generated code]*/
3093
3094 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3095 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3096 PyObject *attrs)
3097 /*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
3098 {
3099 return treebuilder_handle_start(self, tag, attrs);
3100 }
3101
3102 /* ==================================================================== */
3103 /* the expat interface */
3104
3105 #include "expat.h"
3106 #include "pyexpat.h"
3107
3108 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3109 * cached globally without being in per-module state.
3110 */
3111 static struct PyExpat_CAPI *expat_capi;
3112 #define EXPAT(func) (expat_capi->func)
3113
3114 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3115 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3116
3117 typedef struct {
3118 PyObject_HEAD
3119
3120 XML_Parser parser;
3121
3122 PyObject *target;
3123 PyObject *entity;
3124
3125 PyObject *names;
3126
3127 PyObject *handle_start_ns;
3128 PyObject *handle_end_ns;
3129 PyObject *handle_start;
3130 PyObject *handle_data;
3131 PyObject *handle_end;
3132
3133 PyObject *handle_comment;
3134 PyObject *handle_pi;
3135 PyObject *handle_doctype;
3136
3137 PyObject *handle_close;
3138
3139 } XMLParserObject;
3140
3141 /* helpers */
3142
3143 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3144 makeuniversal(XMLParserObject* self, const char* string)
3145 {
3146 /* convert a UTF-8 tag/attribute name from the expat parser
3147 to a universal name string */
3148
3149 Py_ssize_t size = (Py_ssize_t) strlen(string);
3150 PyObject* key;
3151 PyObject* value;
3152
3153 /* look the 'raw' name up in the names dictionary */
3154 key = PyBytes_FromStringAndSize(string, size);
3155 if (!key)
3156 return NULL;
3157
3158 value = PyDict_GetItemWithError(self->names, key);
3159
3160 if (value) {
3161 Py_INCREF(value);
3162 }
3163 else if (!PyErr_Occurred()) {
3164 /* new name. convert to universal name, and decode as
3165 necessary */
3166
3167 PyObject* tag;
3168 char* p;
3169 Py_ssize_t i;
3170
3171 /* look for namespace separator */
3172 for (i = 0; i < size; i++)
3173 if (string[i] == '}')
3174 break;
3175 if (i != size) {
3176 /* convert to universal name */
3177 tag = PyBytes_FromStringAndSize(NULL, size+1);
3178 if (tag == NULL) {
3179 Py_DECREF(key);
3180 return NULL;
3181 }
3182 p = PyBytes_AS_STRING(tag);
3183 p[0] = '{';
3184 memcpy(p+1, string, size);
3185 size++;
3186 } else {
3187 /* plain name; use key as tag */
3188 Py_INCREF(key);
3189 tag = key;
3190 }
3191
3192 /* decode universal name */
3193 p = PyBytes_AS_STRING(tag);
3194 value = PyUnicode_DecodeUTF8(p, size, "strict");
3195 Py_DECREF(tag);
3196 if (!value) {
3197 Py_DECREF(key);
3198 return NULL;
3199 }
3200
3201 /* add to names dictionary */
3202 if (PyDict_SetItem(self->names, key, value) < 0) {
3203 Py_DECREF(key);
3204 Py_DECREF(value);
3205 return NULL;
3206 }
3207 }
3208
3209 Py_DECREF(key);
3210 return value;
3211 }
3212
3213 /* Set the ParseError exception with the given parameters.
3214 * If message is not NULL, it's used as the error string. Otherwise, the
3215 * message string is the default for the given error_code.
3216 */
3217 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3218 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3219 const char *message)
3220 {
3221 PyObject *errmsg, *error, *position, *code;
3222 elementtreestate *st = ET_STATE_GLOBAL;
3223
3224 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3225 message ? message : EXPAT(ErrorString)(error_code),
3226 line, column);
3227 if (errmsg == NULL)
3228 return;
3229
3230 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
3231 Py_DECREF(errmsg);
3232 if (!error)
3233 return;
3234
3235 /* Add code and position attributes */
3236 code = PyLong_FromLong((long)error_code);
3237 if (!code) {
3238 Py_DECREF(error);
3239 return;
3240 }
3241 if (PyObject_SetAttrString(error, "code", code) == -1) {
3242 Py_DECREF(error);
3243 Py_DECREF(code);
3244 return;
3245 }
3246 Py_DECREF(code);
3247
3248 position = Py_BuildValue("(nn)", line, column);
3249 if (!position) {
3250 Py_DECREF(error);
3251 return;
3252 }
3253 if (PyObject_SetAttrString(error, "position", position) == -1) {
3254 Py_DECREF(error);
3255 Py_DECREF(position);
3256 return;
3257 }
3258 Py_DECREF(position);
3259
3260 PyErr_SetObject(st->parseerror_obj, error);
3261 Py_DECREF(error);
3262 }
3263
3264 /* -------------------------------------------------------------------- */
3265 /* handlers */
3266
3267 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3268 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3269 int data_len)
3270 {
3271 PyObject* key;
3272 PyObject* value;
3273 PyObject* res;
3274
3275 if (data_len < 2 || data_in[0] != '&')
3276 return;
3277
3278 if (PyErr_Occurred())
3279 return;
3280
3281 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3282 if (!key)
3283 return;
3284
3285 value = PyDict_GetItemWithError(self->entity, key);
3286
3287 if (value) {
3288 if (TreeBuilder_CheckExact(self->target))
3289 res = treebuilder_handle_data(
3290 (TreeBuilderObject*) self->target, value
3291 );
3292 else if (self->handle_data)
3293 res = _PyObject_FastCall(self->handle_data, &value, 1);
3294 else
3295 res = NULL;
3296 Py_XDECREF(res);
3297 } else if (!PyErr_Occurred()) {
3298 /* Report the first error, not the last */
3299 char message[128] = "undefined entity ";
3300 strncat(message, data_in, data_len < 100?data_len:100);
3301 expat_set_error(
3302 XML_ERROR_UNDEFINED_ENTITY,
3303 EXPAT(GetErrorLineNumber)(self->parser),
3304 EXPAT(GetErrorColumnNumber)(self->parser),
3305 message
3306 );
3307 }
3308
3309 Py_DECREF(key);
3310 }
3311
3312 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3313 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3314 const XML_Char **attrib_in)
3315 {
3316 PyObject* res;
3317 PyObject* tag;
3318 PyObject* attrib;
3319 int ok;
3320
3321 if (PyErr_Occurred())
3322 return;
3323
3324 /* tag name */
3325 tag = makeuniversal(self, tag_in);
3326 if (!tag)
3327 return; /* parser will look for errors */
3328
3329 /* attributes */
3330 if (attrib_in[0]) {
3331 attrib = PyDict_New();
3332 if (!attrib) {
3333 Py_DECREF(tag);
3334 return;
3335 }
3336 while (attrib_in[0] && attrib_in[1]) {
3337 PyObject* key = makeuniversal(self, attrib_in[0]);
3338 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3339 if (!key || !value) {
3340 Py_XDECREF(value);
3341 Py_XDECREF(key);
3342 Py_DECREF(attrib);
3343 Py_DECREF(tag);
3344 return;
3345 }
3346 ok = PyDict_SetItem(attrib, key, value);
3347 Py_DECREF(value);
3348 Py_DECREF(key);
3349 if (ok < 0) {
3350 Py_DECREF(attrib);
3351 Py_DECREF(tag);
3352 return;
3353 }
3354 attrib_in += 2;
3355 }
3356 } else {
3357 Py_INCREF(Py_None);
3358 attrib = Py_None;
3359 }
3360
3361 if (TreeBuilder_CheckExact(self->target)) {
3362 /* shortcut */
3363 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3364 tag, attrib);
3365 }
3366 else if (self->handle_start) {
3367 if (attrib == Py_None) {
3368 Py_DECREF(attrib);
3369 attrib = PyDict_New();
3370 if (!attrib) {
3371 Py_DECREF(tag);
3372 return;
3373 }
3374 }
3375 res = PyObject_CallFunctionObjArgs(self->handle_start,
3376 tag, attrib, NULL);
3377 } else
3378 res = NULL;
3379
3380 Py_DECREF(tag);
3381 Py_DECREF(attrib);
3382
3383 Py_XDECREF(res);
3384 }
3385
3386 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3387 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3388 int data_len)
3389 {
3390 PyObject* data;
3391 PyObject* res;
3392
3393 if (PyErr_Occurred())
3394 return;
3395
3396 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3397 if (!data)
3398 return; /* parser will look for errors */
3399
3400 if (TreeBuilder_CheckExact(self->target))
3401 /* shortcut */
3402 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3403 else if (self->handle_data)
3404 res = _PyObject_FastCall(self->handle_data, &data, 1);
3405 else
3406 res = NULL;
3407
3408 Py_DECREF(data);
3409
3410 Py_XDECREF(res);
3411 }
3412
3413 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3414 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3415 {
3416 PyObject* tag;
3417 PyObject* res = NULL;
3418
3419 if (PyErr_Occurred())
3420 return;
3421
3422 if (TreeBuilder_CheckExact(self->target))
3423 /* shortcut */
3424 /* the standard tree builder doesn't look at the end tag */
3425 res = treebuilder_handle_end(
3426 (TreeBuilderObject*) self->target, Py_None
3427 );
3428 else if (self->handle_end) {
3429 tag = makeuniversal(self, tag_in);
3430 if (tag) {
3431 res = _PyObject_FastCall(self->handle_end, &tag, 1);
3432 Py_DECREF(tag);
3433 }
3434 }
3435
3436 Py_XDECREF(res);
3437 }
3438
3439 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3440 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3441 const XML_Char *uri_in)
3442 {
3443 PyObject* res = NULL;
3444 PyObject* uri;
3445 PyObject* prefix;
3446 PyObject* stack[2];
3447
3448 if (PyErr_Occurred())
3449 return;
3450
3451 if (!uri_in)
3452 uri_in = "";
3453 if (!prefix_in)
3454 prefix_in = "";
3455
3456 if (TreeBuilder_CheckExact(self->target)) {
3457 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3458 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3459
3460 if (target->events_append && target->start_ns_event_obj) {
3461 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3462 if (!prefix)
3463 return;
3464 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3465 if (!uri) {
3466 Py_DECREF(prefix);
3467 return;
3468 }
3469
3470 res = treebuilder_handle_start_ns(target, prefix, uri);
3471 Py_DECREF(uri);
3472 Py_DECREF(prefix);
3473 }
3474 } else if (self->handle_start_ns) {
3475 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3476 if (!prefix)
3477 return;
3478 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3479 if (!uri) {
3480 Py_DECREF(prefix);
3481 return;
3482 }
3483
3484 stack[0] = prefix;
3485 stack[1] = uri;
3486 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3487 Py_DECREF(uri);
3488 Py_DECREF(prefix);
3489 }
3490
3491 Py_XDECREF(res);
3492 }
3493
3494 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3495 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3496 {
3497 PyObject *res = NULL;
3498 PyObject* prefix;
3499
3500 if (PyErr_Occurred())
3501 return;
3502
3503 if (!prefix_in)
3504 prefix_in = "";
3505
3506 if (TreeBuilder_CheckExact(self->target)) {
3507 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3508 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3509
3510 if (target->events_append && target->end_ns_event_obj) {
3511 res = treebuilder_handle_end_ns(target, Py_None);
3512 }
3513 } else if (self->handle_end_ns) {
3514 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3515 if (!prefix)
3516 return;
3517
3518 res = _PyObject_FastCall(self->handle_end_ns, &prefix, 1);
3519 Py_DECREF(prefix);
3520 }
3521
3522 Py_XDECREF(res);
3523 }
3524
3525 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3526 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3527 {
3528 PyObject* comment;
3529 PyObject* res;
3530
3531 if (PyErr_Occurred())
3532 return;
3533
3534 if (TreeBuilder_CheckExact(self->target)) {
3535 /* shortcut */
3536 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3537
3538 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3539 if (!comment)
3540 return; /* parser will look for errors */
3541
3542 res = treebuilder_handle_comment(target, comment);
3543 Py_XDECREF(res);
3544 Py_DECREF(comment);
3545 } else if (self->handle_comment) {
3546 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3547 if (!comment)
3548 return;
3549
3550 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
3551 Py_XDECREF(res);
3552 Py_DECREF(comment);
3553 }
3554 }
3555
3556 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3557 expat_start_doctype_handler(XMLParserObject *self,
3558 const XML_Char *doctype_name,
3559 const XML_Char *sysid,
3560 const XML_Char *pubid,
3561 int has_internal_subset)
3562 {
3563 _Py_IDENTIFIER(doctype);
3564 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3565 PyObject *res;
3566
3567 if (PyErr_Occurred())
3568 return;
3569
3570 doctype_name_obj = makeuniversal(self, doctype_name);
3571 if (!doctype_name_obj)
3572 return;
3573
3574 if (sysid) {
3575 sysid_obj = makeuniversal(self, sysid);
3576 if (!sysid_obj) {
3577 Py_DECREF(doctype_name_obj);
3578 return;
3579 }
3580 } else {
3581 Py_INCREF(Py_None);
3582 sysid_obj = Py_None;
3583 }
3584
3585 if (pubid) {
3586 pubid_obj = makeuniversal(self, pubid);
3587 if (!pubid_obj) {
3588 Py_DECREF(doctype_name_obj);
3589 Py_DECREF(sysid_obj);
3590 return;
3591 }
3592 } else {
3593 Py_INCREF(Py_None);
3594 pubid_obj = Py_None;
3595 }
3596
3597 /* If the target has a handler for doctype, call it. */
3598 if (self->handle_doctype) {
3599 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3600 doctype_name_obj, pubid_obj,
3601 sysid_obj, NULL);
3602 Py_XDECREF(res);
3603 }
3604 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3605 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3606 "The doctype() method of XMLParser is ignored. "
3607 "Define doctype() method on the TreeBuilder target.",
3608 1);
3609 Py_DECREF(res);
3610 }
3611
3612 Py_DECREF(doctype_name_obj);
3613 Py_DECREF(pubid_obj);
3614 Py_DECREF(sysid_obj);
3615 }
3616
3617 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3618 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3619 const XML_Char* data_in)
3620 {
3621 PyObject* pi_target;
3622 PyObject* data;
3623 PyObject* res;
3624 PyObject* stack[2];
3625
3626 if (PyErr_Occurred())
3627 return;
3628
3629 if (TreeBuilder_CheckExact(self->target)) {
3630 /* shortcut */
3631 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3632
3633 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3634 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3635 if (!pi_target)
3636 goto error;
3637 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3638 if (!data)
3639 goto error;
3640 res = treebuilder_handle_pi(target, pi_target, data);
3641 Py_XDECREF(res);
3642 Py_DECREF(data);
3643 Py_DECREF(pi_target);
3644 }
3645 } else if (self->handle_pi) {
3646 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3647 if (!pi_target)
3648 goto error;
3649 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3650 if (!data)
3651 goto error;
3652
3653 stack[0] = pi_target;
3654 stack[1] = data;
3655 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3656 Py_XDECREF(res);
3657 Py_DECREF(data);
3658 Py_DECREF(pi_target);
3659 }
3660
3661 return;
3662
3663 error:
3664 Py_XDECREF(pi_target);
3665 return;
3666 }
3667
3668 /* -------------------------------------------------------------------- */
3669
3670 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3671 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3672 {
3673 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3674 if (self) {
3675 self->parser = NULL;
3676 self->target = self->entity = self->names = NULL;
3677 self->handle_start_ns = self->handle_end_ns = NULL;
3678 self->handle_start = self->handle_data = self->handle_end = NULL;
3679 self->handle_comment = self->handle_pi = self->handle_close = NULL;
3680 self->handle_doctype = NULL;
3681 }
3682 return (PyObject *)self;
3683 }
3684
3685 static int
ignore_attribute_error(PyObject * value)3686 ignore_attribute_error(PyObject *value)
3687 {
3688 if (value == NULL) {
3689 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3690 return -1;
3691 }
3692 PyErr_Clear();
3693 }
3694 return 0;
3695 }
3696
3697 /*[clinic input]
3698 _elementtree.XMLParser.__init__
3699
3700 *
3701 target: object = NULL
3702 encoding: str(accept={str, NoneType}) = None
3703
3704 [clinic start generated code]*/
3705
3706 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3707 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3708 const char *encoding)
3709 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
3710 {
3711 self->entity = PyDict_New();
3712 if (!self->entity)
3713 return -1;
3714
3715 self->names = PyDict_New();
3716 if (!self->names) {
3717 Py_CLEAR(self->entity);
3718 return -1;
3719 }
3720
3721 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3722 if (!self->parser) {
3723 Py_CLEAR(self->entity);
3724 Py_CLEAR(self->names);
3725 PyErr_NoMemory();
3726 return -1;
3727 }
3728 /* expat < 2.1.0 has no XML_SetHashSalt() */
3729 if (EXPAT(SetHashSalt) != NULL) {
3730 EXPAT(SetHashSalt)(self->parser,
3731 (unsigned long)_Py_HashSecret.expat.hashsalt);
3732 }
3733
3734 if (target) {
3735 Py_INCREF(target);
3736 } else {
3737 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3738 if (!target) {
3739 Py_CLEAR(self->entity);
3740 Py_CLEAR(self->names);
3741 return -1;
3742 }
3743 }
3744 self->target = target;
3745
3746 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3747 if (ignore_attribute_error(self->handle_start_ns)) {
3748 return -1;
3749 }
3750 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3751 if (ignore_attribute_error(self->handle_end_ns)) {
3752 return -1;
3753 }
3754 self->handle_start = PyObject_GetAttrString(target, "start");
3755 if (ignore_attribute_error(self->handle_start)) {
3756 return -1;
3757 }
3758 self->handle_data = PyObject_GetAttrString(target, "data");
3759 if (ignore_attribute_error(self->handle_data)) {
3760 return -1;
3761 }
3762 self->handle_end = PyObject_GetAttrString(target, "end");
3763 if (ignore_attribute_error(self->handle_end)) {
3764 return -1;
3765 }
3766 self->handle_comment = PyObject_GetAttrString(target, "comment");
3767 if (ignore_attribute_error(self->handle_comment)) {
3768 return -1;
3769 }
3770 self->handle_pi = PyObject_GetAttrString(target, "pi");
3771 if (ignore_attribute_error(self->handle_pi)) {
3772 return -1;
3773 }
3774 self->handle_close = PyObject_GetAttrString(target, "close");
3775 if (ignore_attribute_error(self->handle_close)) {
3776 return -1;
3777 }
3778 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3779 if (ignore_attribute_error(self->handle_doctype)) {
3780 return -1;
3781 }
3782
3783 /* configure parser */
3784 EXPAT(SetUserData)(self->parser, self);
3785 if (self->handle_start_ns || self->handle_end_ns)
3786 EXPAT(SetNamespaceDeclHandler)(
3787 self->parser,
3788 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3789 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3790 );
3791 EXPAT(SetElementHandler)(
3792 self->parser,
3793 (XML_StartElementHandler) expat_start_handler,
3794 (XML_EndElementHandler) expat_end_handler
3795 );
3796 EXPAT(SetDefaultHandlerExpand)(
3797 self->parser,
3798 (XML_DefaultHandler) expat_default_handler
3799 );
3800 EXPAT(SetCharacterDataHandler)(
3801 self->parser,
3802 (XML_CharacterDataHandler) expat_data_handler
3803 );
3804 if (self->handle_comment)
3805 EXPAT(SetCommentHandler)(
3806 self->parser,
3807 (XML_CommentHandler) expat_comment_handler
3808 );
3809 if (self->handle_pi)
3810 EXPAT(SetProcessingInstructionHandler)(
3811 self->parser,
3812 (XML_ProcessingInstructionHandler) expat_pi_handler
3813 );
3814 EXPAT(SetStartDoctypeDeclHandler)(
3815 self->parser,
3816 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3817 );
3818 EXPAT(SetUnknownEncodingHandler)(
3819 self->parser,
3820 EXPAT(DefaultUnknownEncodingHandler), NULL
3821 );
3822
3823 return 0;
3824 }
3825
3826 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3827 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3828 {
3829 Py_VISIT(self->handle_close);
3830 Py_VISIT(self->handle_pi);
3831 Py_VISIT(self->handle_comment);
3832 Py_VISIT(self->handle_end);
3833 Py_VISIT(self->handle_data);
3834 Py_VISIT(self->handle_start);
3835 Py_VISIT(self->handle_start_ns);
3836 Py_VISIT(self->handle_end_ns);
3837 Py_VISIT(self->handle_doctype);
3838
3839 Py_VISIT(self->target);
3840 Py_VISIT(self->entity);
3841 Py_VISIT(self->names);
3842
3843 return 0;
3844 }
3845
3846 static int
xmlparser_gc_clear(XMLParserObject * self)3847 xmlparser_gc_clear(XMLParserObject *self)
3848 {
3849 if (self->parser != NULL) {
3850 XML_Parser parser = self->parser;
3851 self->parser = NULL;
3852 EXPAT(ParserFree)(parser);
3853 }
3854
3855 Py_CLEAR(self->handle_close);
3856 Py_CLEAR(self->handle_pi);
3857 Py_CLEAR(self->handle_comment);
3858 Py_CLEAR(self->handle_end);
3859 Py_CLEAR(self->handle_data);
3860 Py_CLEAR(self->handle_start);
3861 Py_CLEAR(self->handle_start_ns);
3862 Py_CLEAR(self->handle_end_ns);
3863 Py_CLEAR(self->handle_doctype);
3864
3865 Py_CLEAR(self->target);
3866 Py_CLEAR(self->entity);
3867 Py_CLEAR(self->names);
3868
3869 return 0;
3870 }
3871
3872 static void
xmlparser_dealloc(XMLParserObject * self)3873 xmlparser_dealloc(XMLParserObject* self)
3874 {
3875 PyObject_GC_UnTrack(self);
3876 xmlparser_gc_clear(self);
3877 Py_TYPE(self)->tp_free((PyObject *)self);
3878 }
3879
3880 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3881 _check_xmlparser(XMLParserObject* self)
3882 {
3883 if (self->target == NULL) {
3884 PyErr_SetString(PyExc_ValueError,
3885 "XMLParser.__init__() wasn't called");
3886 return 0;
3887 }
3888 return 1;
3889 }
3890
3891 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3892 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3893 {
3894 int ok;
3895
3896 assert(!PyErr_Occurred());
3897 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3898
3899 if (PyErr_Occurred())
3900 return NULL;
3901
3902 if (!ok) {
3903 expat_set_error(
3904 EXPAT(GetErrorCode)(self->parser),
3905 EXPAT(GetErrorLineNumber)(self->parser),
3906 EXPAT(GetErrorColumnNumber)(self->parser),
3907 NULL
3908 );
3909 return NULL;
3910 }
3911
3912 Py_RETURN_NONE;
3913 }
3914
3915 /*[clinic input]
3916 _elementtree.XMLParser.close
3917
3918 [clinic start generated code]*/
3919
3920 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3921 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3922 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3923 {
3924 /* end feeding data to parser */
3925
3926 PyObject* res;
3927
3928 if (!_check_xmlparser(self)) {
3929 return NULL;
3930 }
3931 res = expat_parse(self, "", 0, 1);
3932 if (!res)
3933 return NULL;
3934
3935 if (TreeBuilder_CheckExact(self->target)) {
3936 Py_DECREF(res);
3937 return treebuilder_done((TreeBuilderObject*) self->target);
3938 }
3939 else if (self->handle_close) {
3940 Py_DECREF(res);
3941 return _PyObject_CallNoArg(self->handle_close);
3942 }
3943 else {
3944 return res;
3945 }
3946 }
3947
3948 /*[clinic input]
3949 _elementtree.XMLParser.feed
3950
3951 data: object
3952 /
3953
3954 [clinic start generated code]*/
3955
3956 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3957 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3958 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3959 {
3960 /* feed data to parser */
3961
3962 if (!_check_xmlparser(self)) {
3963 return NULL;
3964 }
3965 if (PyUnicode_Check(data)) {
3966 Py_ssize_t data_len;
3967 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3968 if (data_ptr == NULL)
3969 return NULL;
3970 if (data_len > INT_MAX) {
3971 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3972 return NULL;
3973 }
3974 /* Explicitly set UTF-8 encoding. Return code ignored. */
3975 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3976 return expat_parse(self, data_ptr, (int)data_len, 0);
3977 }
3978 else {
3979 Py_buffer view;
3980 PyObject *res;
3981 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3982 return NULL;
3983 if (view.len > INT_MAX) {
3984 PyBuffer_Release(&view);
3985 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3986 return NULL;
3987 }
3988 res = expat_parse(self, view.buf, (int)view.len, 0);
3989 PyBuffer_Release(&view);
3990 return res;
3991 }
3992 }
3993
3994 /*[clinic input]
3995 _elementtree.XMLParser._parse_whole
3996
3997 file: object
3998 /
3999
4000 [clinic start generated code]*/
4001
4002 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)4003 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
4004 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
4005 {
4006 /* (internal) parse the whole input, until end of stream */
4007 PyObject* reader;
4008 PyObject* buffer;
4009 PyObject* temp;
4010 PyObject* res;
4011
4012 if (!_check_xmlparser(self)) {
4013 return NULL;
4014 }
4015 reader = PyObject_GetAttrString(file, "read");
4016 if (!reader)
4017 return NULL;
4018
4019 /* read from open file object */
4020 for (;;) {
4021
4022 buffer = PyObject_CallFunction(reader, "i", 64*1024);
4023
4024 if (!buffer) {
4025 /* read failed (e.g. due to KeyboardInterrupt) */
4026 Py_DECREF(reader);
4027 return NULL;
4028 }
4029
4030 if (PyUnicode_CheckExact(buffer)) {
4031 /* A unicode object is encoded into bytes using UTF-8 */
4032 if (PyUnicode_GET_LENGTH(buffer) == 0) {
4033 Py_DECREF(buffer);
4034 break;
4035 }
4036 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
4037 Py_DECREF(buffer);
4038 if (!temp) {
4039 /* Propagate exception from PyUnicode_AsEncodedString */
4040 Py_DECREF(reader);
4041 return NULL;
4042 }
4043 buffer = temp;
4044 }
4045 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
4046 Py_DECREF(buffer);
4047 break;
4048 }
4049
4050 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
4051 Py_DECREF(buffer);
4052 Py_DECREF(reader);
4053 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
4054 return NULL;
4055 }
4056 res = expat_parse(
4057 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
4058 );
4059
4060 Py_DECREF(buffer);
4061
4062 if (!res) {
4063 Py_DECREF(reader);
4064 return NULL;
4065 }
4066 Py_DECREF(res);
4067
4068 }
4069
4070 Py_DECREF(reader);
4071
4072 res = expat_parse(self, "", 0, 1);
4073
4074 if (res && TreeBuilder_CheckExact(self->target)) {
4075 Py_DECREF(res);
4076 return treebuilder_done((TreeBuilderObject*) self->target);
4077 }
4078
4079 return res;
4080 }
4081
4082 /*[clinic input]
4083 _elementtree.XMLParser._setevents
4084
4085 events_queue: object
4086 events_to_report: object = None
4087 /
4088
4089 [clinic start generated code]*/
4090
4091 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4092 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4093 PyObject *events_queue,
4094 PyObject *events_to_report)
4095 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4096 {
4097 /* activate element event reporting */
4098 Py_ssize_t i;
4099 TreeBuilderObject *target;
4100 PyObject *events_append, *events_seq;
4101
4102 if (!_check_xmlparser(self)) {
4103 return NULL;
4104 }
4105 if (!TreeBuilder_CheckExact(self->target)) {
4106 PyErr_SetString(
4107 PyExc_TypeError,
4108 "event handling only supported for ElementTree.TreeBuilder "
4109 "targets"
4110 );
4111 return NULL;
4112 }
4113
4114 target = (TreeBuilderObject*) self->target;
4115
4116 events_append = PyObject_GetAttrString(events_queue, "append");
4117 if (events_append == NULL)
4118 return NULL;
4119 Py_XSETREF(target->events_append, events_append);
4120
4121 /* clear out existing events */
4122 Py_CLEAR(target->start_event_obj);
4123 Py_CLEAR(target->end_event_obj);
4124 Py_CLEAR(target->start_ns_event_obj);
4125 Py_CLEAR(target->end_ns_event_obj);
4126 Py_CLEAR(target->comment_event_obj);
4127 Py_CLEAR(target->pi_event_obj);
4128
4129 if (events_to_report == Py_None) {
4130 /* default is "end" only */
4131 target->end_event_obj = PyUnicode_FromString("end");
4132 Py_RETURN_NONE;
4133 }
4134
4135 if (!(events_seq = PySequence_Fast(events_to_report,
4136 "events must be a sequence"))) {
4137 return NULL;
4138 }
4139
4140 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4141 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4142 const char *event_name = NULL;
4143 if (PyUnicode_Check(event_name_obj)) {
4144 event_name = PyUnicode_AsUTF8(event_name_obj);
4145 } else if (PyBytes_Check(event_name_obj)) {
4146 event_name = PyBytes_AS_STRING(event_name_obj);
4147 }
4148 if (event_name == NULL) {
4149 Py_DECREF(events_seq);
4150 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4151 return NULL;
4152 }
4153
4154 Py_INCREF(event_name_obj);
4155 if (strcmp(event_name, "start") == 0) {
4156 Py_XSETREF(target->start_event_obj, event_name_obj);
4157 } else if (strcmp(event_name, "end") == 0) {
4158 Py_XSETREF(target->end_event_obj, event_name_obj);
4159 } else if (strcmp(event_name, "start-ns") == 0) {
4160 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4161 EXPAT(SetNamespaceDeclHandler)(
4162 self->parser,
4163 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4164 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4165 );
4166 } else if (strcmp(event_name, "end-ns") == 0) {
4167 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4168 EXPAT(SetNamespaceDeclHandler)(
4169 self->parser,
4170 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4171 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4172 );
4173 } else if (strcmp(event_name, "comment") == 0) {
4174 Py_XSETREF(target->comment_event_obj, event_name_obj);
4175 EXPAT(SetCommentHandler)(
4176 self->parser,
4177 (XML_CommentHandler) expat_comment_handler
4178 );
4179 } else if (strcmp(event_name, "pi") == 0) {
4180 Py_XSETREF(target->pi_event_obj, event_name_obj);
4181 EXPAT(SetProcessingInstructionHandler)(
4182 self->parser,
4183 (XML_ProcessingInstructionHandler) expat_pi_handler
4184 );
4185 } else {
4186 Py_DECREF(event_name_obj);
4187 Py_DECREF(events_seq);
4188 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4189 return NULL;
4190 }
4191 }
4192
4193 Py_DECREF(events_seq);
4194 Py_RETURN_NONE;
4195 }
4196
4197 static PyMemberDef xmlparser_members[] = {
4198 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4199 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4200 {NULL}
4201 };
4202
4203 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4204 xmlparser_version_getter(XMLParserObject *self, void *closure)
4205 {
4206 return PyUnicode_FromFormat(
4207 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4208 XML_MINOR_VERSION, XML_MICRO_VERSION);
4209 }
4210
4211 static PyGetSetDef xmlparser_getsetlist[] = {
4212 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4213 {NULL},
4214 };
4215
4216 #include "clinic/_elementtree.c.h"
4217
4218 static PyMethodDef element_methods[] = {
4219
4220 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4221
4222 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4223 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4224
4225 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4226 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4227 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4228
4229 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4230 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4231 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4232 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4233
4234 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4235 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4236 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4237
4238 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
4239 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4240
4241 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4242 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4243
4244 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4245
4246 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4247 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4248 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4249 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4250 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4251
4252 {NULL, NULL}
4253 };
4254
4255 static PyMappingMethods element_as_mapping = {
4256 (lenfunc) element_length,
4257 (binaryfunc) element_subscr,
4258 (objobjargproc) element_ass_subscr,
4259 };
4260
4261 static PyGetSetDef element_getsetlist[] = {
4262 {"tag",
4263 (getter)element_tag_getter,
4264 (setter)element_tag_setter,
4265 "A string identifying what kind of data this element represents"},
4266 {"text",
4267 (getter)element_text_getter,
4268 (setter)element_text_setter,
4269 "A string of text directly after the start tag, or None"},
4270 {"tail",
4271 (getter)element_tail_getter,
4272 (setter)element_tail_setter,
4273 "A string of text directly after the end tag, or None"},
4274 {"attrib",
4275 (getter)element_attrib_getter,
4276 (setter)element_attrib_setter,
4277 "A dictionary containing the element's attributes"},
4278 {NULL},
4279 };
4280
4281 static PyTypeObject Element_Type = {
4282 PyVarObject_HEAD_INIT(NULL, 0)
4283 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4284 /* methods */
4285 (destructor)element_dealloc, /* tp_dealloc */
4286 0, /* tp_vectorcall_offset */
4287 0, /* tp_getattr */
4288 0, /* tp_setattr */
4289 0, /* tp_as_async */
4290 (reprfunc)element_repr, /* tp_repr */
4291 0, /* tp_as_number */
4292 &element_as_sequence, /* tp_as_sequence */
4293 &element_as_mapping, /* tp_as_mapping */
4294 0, /* tp_hash */
4295 0, /* tp_call */
4296 0, /* tp_str */
4297 PyObject_GenericGetAttr, /* tp_getattro */
4298 0, /* tp_setattro */
4299 0, /* tp_as_buffer */
4300 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4301 /* tp_flags */
4302 0, /* tp_doc */
4303 (traverseproc)element_gc_traverse, /* tp_traverse */
4304 (inquiry)element_gc_clear, /* tp_clear */
4305 0, /* tp_richcompare */
4306 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4307 0, /* tp_iter */
4308 0, /* tp_iternext */
4309 element_methods, /* tp_methods */
4310 0, /* tp_members */
4311 element_getsetlist, /* tp_getset */
4312 0, /* tp_base */
4313 0, /* tp_dict */
4314 0, /* tp_descr_get */
4315 0, /* tp_descr_set */
4316 0, /* tp_dictoffset */
4317 (initproc)element_init, /* tp_init */
4318 PyType_GenericAlloc, /* tp_alloc */
4319 element_new, /* tp_new */
4320 0, /* tp_free */
4321 };
4322
4323 static PyMethodDef treebuilder_methods[] = {
4324 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4325 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4326 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4327 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4328 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4329 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4330 {NULL, NULL}
4331 };
4332
4333 static PyTypeObject TreeBuilder_Type = {
4334 PyVarObject_HEAD_INIT(NULL, 0)
4335 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4336 /* methods */
4337 (destructor)treebuilder_dealloc, /* tp_dealloc */
4338 0, /* tp_vectorcall_offset */
4339 0, /* tp_getattr */
4340 0, /* tp_setattr */
4341 0, /* tp_as_async */
4342 0, /* tp_repr */
4343 0, /* tp_as_number */
4344 0, /* tp_as_sequence */
4345 0, /* tp_as_mapping */
4346 0, /* tp_hash */
4347 0, /* tp_call */
4348 0, /* tp_str */
4349 0, /* tp_getattro */
4350 0, /* tp_setattro */
4351 0, /* tp_as_buffer */
4352 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4353 /* tp_flags */
4354 0, /* tp_doc */
4355 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4356 (inquiry)treebuilder_gc_clear, /* tp_clear */
4357 0, /* tp_richcompare */
4358 0, /* tp_weaklistoffset */
4359 0, /* tp_iter */
4360 0, /* tp_iternext */
4361 treebuilder_methods, /* tp_methods */
4362 0, /* tp_members */
4363 0, /* tp_getset */
4364 0, /* tp_base */
4365 0, /* tp_dict */
4366 0, /* tp_descr_get */
4367 0, /* tp_descr_set */
4368 0, /* tp_dictoffset */
4369 _elementtree_TreeBuilder___init__, /* tp_init */
4370 PyType_GenericAlloc, /* tp_alloc */
4371 treebuilder_new, /* tp_new */
4372 0, /* tp_free */
4373 };
4374
4375 static PyMethodDef xmlparser_methods[] = {
4376 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4377 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4378 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4379 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4380 {NULL, NULL}
4381 };
4382
4383 static PyTypeObject XMLParser_Type = {
4384 PyVarObject_HEAD_INIT(NULL, 0)
4385 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4386 /* methods */
4387 (destructor)xmlparser_dealloc, /* tp_dealloc */
4388 0, /* tp_vectorcall_offset */
4389 0, /* tp_getattr */
4390 0, /* tp_setattr */
4391 0, /* tp_as_async */
4392 0, /* tp_repr */
4393 0, /* tp_as_number */
4394 0, /* tp_as_sequence */
4395 0, /* tp_as_mapping */
4396 0, /* tp_hash */
4397 0, /* tp_call */
4398 0, /* tp_str */
4399 0, /* tp_getattro */
4400 0, /* tp_setattro */
4401 0, /* tp_as_buffer */
4402 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4403 /* tp_flags */
4404 0, /* tp_doc */
4405 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4406 (inquiry)xmlparser_gc_clear, /* tp_clear */
4407 0, /* tp_richcompare */
4408 0, /* tp_weaklistoffset */
4409 0, /* tp_iter */
4410 0, /* tp_iternext */
4411 xmlparser_methods, /* tp_methods */
4412 xmlparser_members, /* tp_members */
4413 xmlparser_getsetlist, /* tp_getset */
4414 0, /* tp_base */
4415 0, /* tp_dict */
4416 0, /* tp_descr_get */
4417 0, /* tp_descr_set */
4418 0, /* tp_dictoffset */
4419 _elementtree_XMLParser___init__, /* tp_init */
4420 PyType_GenericAlloc, /* tp_alloc */
4421 xmlparser_new, /* tp_new */
4422 0, /* tp_free */
4423 };
4424
4425 /* ==================================================================== */
4426 /* python module interface */
4427
4428 static PyMethodDef _functions[] = {
4429 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4430 _ELEMENTTREE__SET_FACTORIES_METHODDEF
4431 {NULL, NULL}
4432 };
4433
4434
4435 static struct PyModuleDef elementtreemodule = {
4436 PyModuleDef_HEAD_INIT,
4437 "_elementtree",
4438 NULL,
4439 sizeof(elementtreestate),
4440 _functions,
4441 NULL,
4442 elementtree_traverse,
4443 elementtree_clear,
4444 elementtree_free
4445 };
4446
4447 PyMODINIT_FUNC
PyInit__elementtree(void)4448 PyInit__elementtree(void)
4449 {
4450 PyObject *m, *temp;
4451 elementtreestate *st;
4452
4453 m = PyState_FindModule(&elementtreemodule);
4454 if (m) {
4455 Py_INCREF(m);
4456 return m;
4457 }
4458
4459 /* Initialize object types */
4460 if (PyType_Ready(&ElementIter_Type) < 0)
4461 return NULL;
4462 if (PyType_Ready(&TreeBuilder_Type) < 0)
4463 return NULL;
4464 if (PyType_Ready(&Element_Type) < 0)
4465 return NULL;
4466 if (PyType_Ready(&XMLParser_Type) < 0)
4467 return NULL;
4468
4469 m = PyModule_Create(&elementtreemodule);
4470 if (!m)
4471 return NULL;
4472 st = ET_STATE(m);
4473
4474 if (!(temp = PyImport_ImportModule("copy")))
4475 return NULL;
4476 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4477 Py_XDECREF(temp);
4478
4479 if (st->deepcopy_obj == NULL) {
4480 return NULL;
4481 }
4482
4483 assert(!PyErr_Occurred());
4484 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4485 return NULL;
4486
4487 /* link against pyexpat */
4488 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4489 if (expat_capi) {
4490 /* check that it's usable */
4491 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4492 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4493 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4494 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4495 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4496 PyErr_SetString(PyExc_ImportError,
4497 "pyexpat version is incompatible");
4498 return NULL;
4499 }
4500 } else {
4501 return NULL;
4502 }
4503
4504 st->parseerror_obj = PyErr_NewException(
4505 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4506 );
4507 Py_INCREF(st->parseerror_obj);
4508 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
4509
4510 Py_INCREF((PyObject *)&Element_Type);
4511 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4512
4513 Py_INCREF((PyObject *)&TreeBuilder_Type);
4514 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4515
4516 Py_INCREF((PyObject *)&XMLParser_Type);
4517 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
4518
4519 return m;
4520 }
4521