1 /*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See https://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * info@pythonware.com
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14 #define PY_SSIZE_T_CLEAN
15
16 #include "Python.h"
17 #include "structmember.h" // PyMemberDef
18
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21
22 /* An element can hold this many children without extra memory
23 allocations. */
24 #define STATIC_CHILDREN 4
25
26 /* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31 /* Also note that pymalloc always allocates blocks in multiples of
32 eight bytes. For the current C version of ElementTree, this means
33 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36 /* -------------------------------------------------------------------- */
37
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55
56 /* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71 }
72
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78 if (*p) {
79 _set_joined_ptr(p, NULL);
80 }
81 }
82
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88
89
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95 PyObject *comment_factory;
96 PyObject *pi_factory;
97 } elementtreestate;
98
99 static struct PyModuleDef elementtreemodule;
100
101 /* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104 static inline elementtreestate*
get_elementtree_state(PyObject * module)105 get_elementtree_state(PyObject *module)
106 {
107 void *state = PyModule_GetState(module);
108 assert(state != NULL);
109 return (elementtreestate *)state;
110 }
111
112 /* Find the module instance imported in the currently running sub-interpreter
113 * and get its state.
114 */
115 #define ET_STATE_GLOBAL \
116 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117
118 static int
elementtree_clear(PyObject * m)119 elementtree_clear(PyObject *m)
120 {
121 elementtreestate *st = get_elementtree_state(m);
122 Py_CLEAR(st->parseerror_obj);
123 Py_CLEAR(st->deepcopy_obj);
124 Py_CLEAR(st->elementpath_obj);
125 Py_CLEAR(st->comment_factory);
126 Py_CLEAR(st->pi_factory);
127 return 0;
128 }
129
130 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)131 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132 {
133 elementtreestate *st = get_elementtree_state(m);
134 Py_VISIT(st->parseerror_obj);
135 Py_VISIT(st->deepcopy_obj);
136 Py_VISIT(st->elementpath_obj);
137 Py_VISIT(st->comment_factory);
138 Py_VISIT(st->pi_factory);
139 return 0;
140 }
141
142 static void
elementtree_free(void * m)143 elementtree_free(void *m)
144 {
145 elementtree_clear((PyObject *)m);
146 }
147
148 /* helpers */
149
150 LOCAL(PyObject*)
list_join(PyObject * list)151 list_join(PyObject* list)
152 {
153 /* join list elements */
154 PyObject* joiner;
155 PyObject* result;
156
157 joiner = PyUnicode_FromStringAndSize("", 0);
158 if (!joiner)
159 return NULL;
160 result = PyUnicode_Join(joiner, list);
161 Py_DECREF(joiner);
162 return result;
163 }
164
165 /* Is the given object an empty dictionary?
166 */
167 static int
is_empty_dict(PyObject * obj)168 is_empty_dict(PyObject *obj)
169 {
170 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
171 }
172
173
174 /* -------------------------------------------------------------------- */
175 /* the Element type */
176
177 typedef struct {
178
179 /* attributes (a dictionary object), or NULL if no attributes */
180 PyObject* attrib;
181
182 /* child elements */
183 Py_ssize_t length; /* actual number of items */
184 Py_ssize_t allocated; /* allocated items */
185
186 /* this either points to _children or to a malloced buffer */
187 PyObject* *children;
188
189 PyObject* _children[STATIC_CHILDREN];
190
191 } ElementObjectExtra;
192
193 typedef struct {
194 PyObject_HEAD
195
196 /* element tag (a string). */
197 PyObject* tag;
198
199 /* text before first child. note that this is a tagged pointer;
200 use JOIN_OBJ to get the object pointer. the join flag is used
201 to distinguish lists created by the tree builder from lists
202 assigned to the attribute by application code; the former
203 should be joined before being returned to the user, the latter
204 should be left intact. */
205 PyObject* text;
206
207 /* text after this element, in parent. note that this is a tagged
208 pointer; use JOIN_OBJ to get the object pointer. */
209 PyObject* tail;
210
211 ElementObjectExtra* extra;
212
213 PyObject *weakreflist; /* For tp_weaklistoffset */
214
215 } ElementObject;
216
217
218 #define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
219 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220
221
222 /* -------------------------------------------------------------------- */
223 /* Element constructors and destructor */
224
225 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)226 create_extra(ElementObject* self, PyObject* attrib)
227 {
228 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
229 if (!self->extra) {
230 PyErr_NoMemory();
231 return -1;
232 }
233
234 Py_XINCREF(attrib);
235 self->extra->attrib = attrib;
236
237 self->extra->length = 0;
238 self->extra->allocated = STATIC_CHILDREN;
239 self->extra->children = self->extra->_children;
240
241 return 0;
242 }
243
244 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)245 dealloc_extra(ElementObjectExtra *extra)
246 {
247 Py_ssize_t i;
248
249 if (!extra)
250 return;
251
252 Py_XDECREF(extra->attrib);
253
254 for (i = 0; i < extra->length; i++)
255 Py_DECREF(extra->children[i]);
256
257 if (extra->children != extra->_children)
258 PyObject_Free(extra->children);
259
260 PyObject_Free(extra);
261 }
262
263 LOCAL(void)
clear_extra(ElementObject * self)264 clear_extra(ElementObject* self)
265 {
266 ElementObjectExtra *myextra;
267
268 if (!self->extra)
269 return;
270
271 /* Avoid DECREFs calling into this code again (cycles, etc.)
272 */
273 myextra = self->extra;
274 self->extra = NULL;
275
276 dealloc_extra(myextra);
277 }
278
279 /* Convenience internal function to create new Element objects with the given
280 * tag and attributes.
281 */
282 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)283 create_new_element(PyObject* tag, PyObject* attrib)
284 {
285 ElementObject* self;
286
287 self = PyObject_GC_New(ElementObject, &Element_Type);
288 if (self == NULL)
289 return NULL;
290 self->extra = NULL;
291
292 Py_INCREF(tag);
293 self->tag = tag;
294
295 Py_INCREF(Py_None);
296 self->text = Py_None;
297
298 Py_INCREF(Py_None);
299 self->tail = Py_None;
300
301 self->weakreflist = NULL;
302
303 ALLOC(sizeof(ElementObject), "create element");
304 PyObject_GC_Track(self);
305
306 if (attrib != NULL && !is_empty_dict(attrib)) {
307 if (create_extra(self, attrib) < 0) {
308 Py_DECREF(self);
309 return NULL;
310 }
311 }
312
313 return (PyObject*) self;
314 }
315
316 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)317 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318 {
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
331 e->weakreflist = NULL;
332 }
333 return (PyObject *)e;
334 }
335
336 /* Helper function for extracting the attrib dictionary from a keywords dict.
337 * This is required by some constructors/functions in this module that can
338 * either accept attrib as a keyword argument or all attributes splashed
339 * directly into *kwds.
340 *
341 * Return a dictionary with the content of kwds merged into the content of
342 * attrib. If there is no attrib keyword, return a copy of kwds.
343 */
344 static PyObject*
get_attrib_from_keywords(PyObject * kwds)345 get_attrib_from_keywords(PyObject *kwds)
346 {
347 PyObject *attrib_str = PyUnicode_FromString("attrib");
348 if (attrib_str == NULL) {
349 return NULL;
350 }
351 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
352
353 if (attrib) {
354 /* If attrib was found in kwds, copy its value and remove it from
355 * kwds
356 */
357 if (!PyDict_Check(attrib)) {
358 Py_DECREF(attrib_str);
359 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360 Py_TYPE(attrib)->tp_name);
361 return NULL;
362 }
363 attrib = PyDict_Copy(attrib);
364 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365 Py_DECREF(attrib);
366 attrib = NULL;
367 }
368 }
369 else if (!PyErr_Occurred()) {
370 attrib = PyDict_New();
371 }
372
373 Py_DECREF(attrib_str);
374
375 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376 Py_DECREF(attrib);
377 return NULL;
378 }
379 return attrib;
380 }
381
382 /*[clinic input]
383 module _elementtree
384 class _elementtree.Element "ElementObject *" "&Element_Type"
385 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387 [clinic start generated code]*/
388 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389
390 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)391 element_init(PyObject *self, PyObject *args, PyObject *kwds)
392 {
393 PyObject *tag;
394 PyObject *attrib = NULL;
395 ElementObject *self_elem;
396
397 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398 return -1;
399
400 if (attrib) {
401 /* attrib passed as positional arg */
402 attrib = PyDict_Copy(attrib);
403 if (!attrib)
404 return -1;
405 if (kwds) {
406 if (PyDict_Update(attrib, kwds) < 0) {
407 Py_DECREF(attrib);
408 return -1;
409 }
410 }
411 } else if (kwds) {
412 /* have keywords args */
413 attrib = get_attrib_from_keywords(kwds);
414 if (!attrib)
415 return -1;
416 }
417
418 self_elem = (ElementObject *)self;
419
420 if (attrib != NULL && !is_empty_dict(attrib)) {
421 if (create_extra(self_elem, attrib) < 0) {
422 Py_DECREF(attrib);
423 return -1;
424 }
425 }
426
427 /* We own a reference to attrib here and it's no longer needed. */
428 Py_XDECREF(attrib);
429
430 /* Replace the objects already pointed to by tag, text and tail. */
431 Py_INCREF(tag);
432 Py_XSETREF(self_elem->tag, tag);
433
434 Py_INCREF(Py_None);
435 _set_joined_ptr(&self_elem->text, Py_None);
436
437 Py_INCREF(Py_None);
438 _set_joined_ptr(&self_elem->tail, Py_None);
439
440 return 0;
441 }
442
443 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)444 element_resize(ElementObject* self, Py_ssize_t extra)
445 {
446 Py_ssize_t size;
447 PyObject* *children;
448
449 assert(extra >= 0);
450 /* make sure self->children can hold the given number of extra
451 elements. set an exception and return -1 if allocation failed */
452
453 if (!self->extra) {
454 if (create_extra(self, NULL) < 0)
455 return -1;
456 }
457
458 size = self->extra->length + extra; /* never overflows */
459
460 if (size > self->extra->allocated) {
461 /* use Python 2.4's list growth strategy */
462 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
463 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
464 * which needs at least 4 bytes.
465 * Although it's a false alarm always assume at least one child to
466 * be safe.
467 */
468 size = size ? size : 1;
469 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470 goto nomemory;
471 if (self->extra->children != self->extra->_children) {
472 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
473 * "children", which needs at least 4 bytes. Although it's a
474 * false alarm always assume at least one child to be safe.
475 */
476 children = PyObject_Realloc(self->extra->children,
477 size * sizeof(PyObject*));
478 if (!children)
479 goto nomemory;
480 } else {
481 children = PyObject_Malloc(size * sizeof(PyObject*));
482 if (!children)
483 goto nomemory;
484 /* copy existing children from static area to malloc buffer */
485 memcpy(children, self->extra->children,
486 self->extra->length * sizeof(PyObject*));
487 }
488 self->extra->children = children;
489 self->extra->allocated = size;
490 }
491
492 return 0;
493
494 nomemory:
495 PyErr_NoMemory();
496 return -1;
497 }
498
499 LOCAL(void)
raise_type_error(PyObject * element)500 raise_type_error(PyObject *element)
501 {
502 PyErr_Format(PyExc_TypeError,
503 "expected an Element, not \"%.200s\"",
504 Py_TYPE(element)->tp_name);
505 }
506
507 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)508 element_add_subelement(ElementObject* self, PyObject* element)
509 {
510 /* add a child element to a parent */
511
512 if (!Element_Check(element)) {
513 raise_type_error(element);
514 return -1;
515 }
516
517 if (element_resize(self, 1) < 0)
518 return -1;
519
520 Py_INCREF(element);
521 self->extra->children[self->extra->length] = element;
522
523 self->extra->length++;
524
525 return 0;
526 }
527
528 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)529 element_get_attrib(ElementObject* self)
530 {
531 /* return borrowed reference to attrib dictionary */
532 /* note: this function assumes that the extra section exists */
533
534 PyObject* res = self->extra->attrib;
535
536 if (!res) {
537 /* create missing dictionary */
538 res = self->extra->attrib = PyDict_New();
539 }
540
541 return res;
542 }
543
544 LOCAL(PyObject*)
element_get_text(ElementObject * self)545 element_get_text(ElementObject* self)
546 {
547 /* return borrowed reference to text attribute */
548
549 PyObject *res = self->text;
550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
554 PyObject *tmp = list_join(res);
555 if (!tmp)
556 return NULL;
557 self->text = tmp;
558 Py_DECREF(res);
559 res = tmp;
560 }
561 }
562
563 return res;
564 }
565
566 LOCAL(PyObject*)
element_get_tail(ElementObject * self)567 element_get_tail(ElementObject* self)
568 {
569 /* return borrowed reference to text attribute */
570
571 PyObject *res = self->tail;
572
573 if (JOIN_GET(res)) {
574 res = JOIN_OBJ(res);
575 if (PyList_CheckExact(res)) {
576 PyObject *tmp = list_join(res);
577 if (!tmp)
578 return NULL;
579 self->tail = tmp;
580 Py_DECREF(res);
581 res = tmp;
582 }
583 }
584
585 return res;
586 }
587
588 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)589 subelement(PyObject *self, PyObject *args, PyObject *kwds)
590 {
591 PyObject* elem;
592
593 ElementObject* parent;
594 PyObject* tag;
595 PyObject* attrib = NULL;
596 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597 &Element_Type, &parent, &tag,
598 &PyDict_Type, &attrib)) {
599 return NULL;
600 }
601
602 if (attrib) {
603 /* attrib passed as positional arg */
604 attrib = PyDict_Copy(attrib);
605 if (!attrib)
606 return NULL;
607 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608 Py_DECREF(attrib);
609 return NULL;
610 }
611 } else if (kwds) {
612 /* have keyword args */
613 attrib = get_attrib_from_keywords(kwds);
614 if (!attrib)
615 return NULL;
616 } else {
617 /* no attrib arg, no kwds, so no attribute */
618 }
619
620 elem = create_new_element(tag, attrib);
621 Py_XDECREF(attrib);
622 if (elem == NULL)
623 return NULL;
624
625 if (element_add_subelement(parent, elem) < 0) {
626 Py_DECREF(elem);
627 return NULL;
628 }
629
630 return elem;
631 }
632
633 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)634 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635 {
636 Py_VISIT(self->tag);
637 Py_VISIT(JOIN_OBJ(self->text));
638 Py_VISIT(JOIN_OBJ(self->tail));
639
640 if (self->extra) {
641 Py_ssize_t i;
642 Py_VISIT(self->extra->attrib);
643
644 for (i = 0; i < self->extra->length; ++i)
645 Py_VISIT(self->extra->children[i]);
646 }
647 return 0;
648 }
649
650 static int
element_gc_clear(ElementObject * self)651 element_gc_clear(ElementObject *self)
652 {
653 Py_CLEAR(self->tag);
654 _clear_joined_ptr(&self->text);
655 _clear_joined_ptr(&self->tail);
656
657 /* After dropping all references from extra, it's no longer valid anyway,
658 * so fully deallocate it.
659 */
660 clear_extra(self);
661 return 0;
662 }
663
664 static void
element_dealloc(ElementObject * self)665 element_dealloc(ElementObject* self)
666 {
667 /* bpo-31095: UnTrack is needed before calling any callbacks */
668 PyObject_GC_UnTrack(self);
669 Py_TRASHCAN_BEGIN(self, element_dealloc)
670
671 if (self->weakreflist != NULL)
672 PyObject_ClearWeakRefs((PyObject *) self);
673
674 /* element_gc_clear clears all references and deallocates extra
675 */
676 element_gc_clear(self);
677
678 RELEASE(sizeof(ElementObject), "destroy element");
679 Py_TYPE(self)->tp_free((PyObject *)self);
680 Py_TRASHCAN_END
681 }
682
683 /* -------------------------------------------------------------------- */
684
685 /*[clinic input]
686 _elementtree.Element.append
687
688 subelement: object(subclass_of='&Element_Type')
689 /
690
691 [clinic start generated code]*/
692
693 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)694 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696 {
697 if (element_add_subelement(self, subelement) < 0)
698 return NULL;
699
700 Py_RETURN_NONE;
701 }
702
703 /*[clinic input]
704 _elementtree.Element.clear
705
706 [clinic start generated code]*/
707
708 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)709 _elementtree_Element_clear_impl(ElementObject *self)
710 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711 {
712 clear_extra(self);
713
714 Py_INCREF(Py_None);
715 _set_joined_ptr(&self->text, Py_None);
716
717 Py_INCREF(Py_None);
718 _set_joined_ptr(&self->tail, Py_None);
719
720 Py_RETURN_NONE;
721 }
722
723 /*[clinic input]
724 _elementtree.Element.__copy__
725
726 [clinic start generated code]*/
727
728 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)729 _elementtree_Element___copy___impl(ElementObject *self)
730 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
731 {
732 Py_ssize_t i;
733 ElementObject* element;
734
735 element = (ElementObject*) create_new_element(
736 self->tag, self->extra ? self->extra->attrib : NULL);
737 if (!element)
738 return NULL;
739
740 Py_INCREF(JOIN_OBJ(self->text));
741 _set_joined_ptr(&element->text, self->text);
742
743 Py_INCREF(JOIN_OBJ(self->tail));
744 _set_joined_ptr(&element->tail, self->tail);
745
746 assert(!element->extra || !element->extra->length);
747 if (self->extra) {
748 if (element_resize(element, self->extra->length) < 0) {
749 Py_DECREF(element);
750 return NULL;
751 }
752
753 for (i = 0; i < self->extra->length; i++) {
754 Py_INCREF(self->extra->children[i]);
755 element->extra->children[i] = self->extra->children[i];
756 }
757
758 assert(!element->extra->length);
759 element->extra->length = self->extra->length;
760 }
761
762 return (PyObject*) element;
763 }
764
765 /* Helper for a deep copy. */
766 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767
768 /*[clinic input]
769 _elementtree.Element.__deepcopy__
770
771 memo: object(subclass_of="&PyDict_Type")
772 /
773
774 [clinic start generated code]*/
775
776 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)777 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
779 {
780 Py_ssize_t i;
781 ElementObject* element;
782 PyObject* tag;
783 PyObject* attrib;
784 PyObject* text;
785 PyObject* tail;
786 PyObject* id;
787
788 tag = deepcopy(self->tag, memo);
789 if (!tag)
790 return NULL;
791
792 if (self->extra && self->extra->attrib) {
793 attrib = deepcopy(self->extra->attrib, memo);
794 if (!attrib) {
795 Py_DECREF(tag);
796 return NULL;
797 }
798 } else {
799 attrib = NULL;
800 }
801
802 element = (ElementObject*) create_new_element(tag, attrib);
803
804 Py_DECREF(tag);
805 Py_XDECREF(attrib);
806
807 if (!element)
808 return NULL;
809
810 text = deepcopy(JOIN_OBJ(self->text), memo);
811 if (!text)
812 goto error;
813 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
814
815 tail = deepcopy(JOIN_OBJ(self->tail), memo);
816 if (!tail)
817 goto error;
818 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
819
820 assert(!element->extra || !element->extra->length);
821 if (self->extra) {
822 if (element_resize(element, self->extra->length) < 0)
823 goto error;
824
825 for (i = 0; i < self->extra->length; i++) {
826 PyObject* child = deepcopy(self->extra->children[i], memo);
827 if (!child || !Element_Check(child)) {
828 if (child) {
829 raise_type_error(child);
830 Py_DECREF(child);
831 }
832 element->extra->length = i;
833 goto error;
834 }
835 element->extra->children[i] = child;
836 }
837
838 assert(!element->extra->length);
839 element->extra->length = self->extra->length;
840 }
841
842 /* add object to memo dictionary (so deepcopy won't visit it again) */
843 id = PyLong_FromSsize_t((uintptr_t) self);
844 if (!id)
845 goto error;
846
847 i = PyDict_SetItem(memo, id, (PyObject*) element);
848
849 Py_DECREF(id);
850
851 if (i < 0)
852 goto error;
853
854 return (PyObject*) element;
855
856 error:
857 Py_DECREF(element);
858 return NULL;
859 }
860
861 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)862 deepcopy(PyObject *object, PyObject *memo)
863 {
864 /* do a deep copy of the given object */
865 elementtreestate *st;
866 PyObject *stack[2];
867
868 /* Fast paths */
869 if (object == Py_None || PyUnicode_CheckExact(object)) {
870 Py_INCREF(object);
871 return object;
872 }
873
874 if (Py_REFCNT(object) == 1) {
875 if (PyDict_CheckExact(object)) {
876 PyObject *key, *value;
877 Py_ssize_t pos = 0;
878 int simple = 1;
879 while (PyDict_Next(object, &pos, &key, &value)) {
880 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881 simple = 0;
882 break;
883 }
884 }
885 if (simple)
886 return PyDict_Copy(object);
887 /* Fall through to general case */
888 }
889 else if (Element_CheckExact(object)) {
890 return _elementtree_Element___deepcopy___impl(
891 (ElementObject *)object, memo);
892 }
893 }
894
895 /* General case */
896 st = ET_STATE_GLOBAL;
897 if (!st->deepcopy_obj) {
898 PyErr_SetString(PyExc_RuntimeError,
899 "deepcopy helper not found");
900 return NULL;
901 }
902
903 stack[0] = object;
904 stack[1] = memo;
905 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
906 }
907
908
909 /*[clinic input]
910 _elementtree.Element.__sizeof__ -> Py_ssize_t
911
912 [clinic start generated code]*/
913
914 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)915 _elementtree_Element___sizeof___impl(ElementObject *self)
916 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
917 {
918 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
919 if (self->extra) {
920 result += sizeof(ElementObjectExtra);
921 if (self->extra->children != self->extra->_children)
922 result += sizeof(PyObject*) * self->extra->allocated;
923 }
924 return result;
925 }
926
927 /* dict keys for getstate/setstate. */
928 #define PICKLED_TAG "tag"
929 #define PICKLED_CHILDREN "_children"
930 #define PICKLED_ATTRIB "attrib"
931 #define PICKLED_TAIL "tail"
932 #define PICKLED_TEXT "text"
933
934 /* __getstate__ returns a fabricated instance dict as in the pure-Python
935 * Element implementation, for interoperability/interchangeability. This
936 * makes the pure-Python implementation details an API, but (a) there aren't
937 * any unnecessary structures there; and (b) it buys compatibility with 3.2
938 * pickles. See issue #16076.
939 */
940 /*[clinic input]
941 _elementtree.Element.__getstate__
942
943 [clinic start generated code]*/
944
945 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)946 _elementtree_Element___getstate___impl(ElementObject *self)
947 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
948 {
949 Py_ssize_t i;
950 PyObject *children, *attrib;
951
952 /* Build a list of children. */
953 children = PyList_New(self->extra ? self->extra->length : 0);
954 if (!children)
955 return NULL;
956 for (i = 0; i < PyList_GET_SIZE(children); i++) {
957 PyObject *child = self->extra->children[i];
958 Py_INCREF(child);
959 PyList_SET_ITEM(children, i, child);
960 }
961
962 if (self->extra && self->extra->attrib) {
963 attrib = self->extra->attrib;
964 Py_INCREF(attrib);
965 }
966 else {
967 attrib = PyDict_New();
968 if (!attrib) {
969 Py_DECREF(children);
970 return NULL;
971 }
972 }
973
974 return Py_BuildValue("{sOsNsNsOsO}",
975 PICKLED_TAG, self->tag,
976 PICKLED_CHILDREN, children,
977 PICKLED_ATTRIB, attrib,
978 PICKLED_TEXT, JOIN_OBJ(self->text),
979 PICKLED_TAIL, JOIN_OBJ(self->tail));
980 }
981
982 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)983 element_setstate_from_attributes(ElementObject *self,
984 PyObject *tag,
985 PyObject *attrib,
986 PyObject *text,
987 PyObject *tail,
988 PyObject *children)
989 {
990 Py_ssize_t i, nchildren;
991 ElementObjectExtra *oldextra = NULL;
992
993 if (!tag) {
994 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995 return NULL;
996 }
997
998 Py_INCREF(tag);
999 Py_XSETREF(self->tag, tag);
1000
1001 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002 Py_INCREF(JOIN_OBJ(text));
1003 _set_joined_ptr(&self->text, text);
1004
1005 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(tail));
1007 _set_joined_ptr(&self->tail, tail);
1008
1009 /* Handle ATTRIB and CHILDREN. */
1010 if (!children && !attrib) {
1011 Py_RETURN_NONE;
1012 }
1013
1014 /* Compute 'nchildren'. */
1015 if (children) {
1016 if (!PyList_Check(children)) {
1017 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018 return NULL;
1019 }
1020 nchildren = PyList_GET_SIZE(children);
1021
1022 /* (Re-)allocate 'extra'.
1023 Avoid DECREFs calling into this code again (cycles, etc.)
1024 */
1025 oldextra = self->extra;
1026 self->extra = NULL;
1027 if (element_resize(self, nchildren)) {
1028 assert(!self->extra || !self->extra->length);
1029 clear_extra(self);
1030 self->extra = oldextra;
1031 return NULL;
1032 }
1033 assert(self->extra);
1034 assert(self->extra->allocated >= nchildren);
1035 if (oldextra) {
1036 assert(self->extra->attrib == NULL);
1037 self->extra->attrib = oldextra->attrib;
1038 oldextra->attrib = NULL;
1039 }
1040
1041 /* Copy children */
1042 for (i = 0; i < nchildren; i++) {
1043 PyObject *child = PyList_GET_ITEM(children, i);
1044 if (!Element_Check(child)) {
1045 raise_type_error(child);
1046 self->extra->length = i;
1047 dealloc_extra(oldextra);
1048 return NULL;
1049 }
1050 Py_INCREF(child);
1051 self->extra->children[i] = child;
1052 }
1053
1054 assert(!self->extra->length);
1055 self->extra->length = nchildren;
1056 }
1057 else {
1058 if (element_resize(self, 0)) {
1059 return NULL;
1060 }
1061 }
1062
1063 /* Stash attrib. */
1064 Py_XINCREF(attrib);
1065 Py_XSETREF(self->extra->attrib, attrib);
1066 dealloc_extra(oldextra);
1067
1068 Py_RETURN_NONE;
1069 }
1070
1071 /* __setstate__ for Element instance from the Python implementation.
1072 * 'state' should be the instance dict.
1073 */
1074
1075 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1076 element_setstate_from_Python(ElementObject *self, PyObject *state)
1077 {
1078 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080 PyObject *args;
1081 PyObject *tag, *attrib, *text, *tail, *children;
1082 PyObject *retval;
1083
1084 tag = attrib = text = tail = children = NULL;
1085 args = PyTuple_New(0);
1086 if (!args)
1087 return NULL;
1088
1089 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090 &attrib, &text, &tail, &children))
1091 retval = element_setstate_from_attributes(self, tag, attrib, text,
1092 tail, children);
1093 else
1094 retval = NULL;
1095
1096 Py_DECREF(args);
1097 return retval;
1098 }
1099
1100 /*[clinic input]
1101 _elementtree.Element.__setstate__
1102
1103 state: object
1104 /
1105
1106 [clinic start generated code]*/
1107
1108 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1109 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1111 {
1112 if (!PyDict_CheckExact(state)) {
1113 PyErr_Format(PyExc_TypeError,
1114 "Don't know how to unpickle \"%.200R\" as an Element",
1115 state);
1116 return NULL;
1117 }
1118 else
1119 return element_setstate_from_Python(self, state);
1120 }
1121
1122 LOCAL(int)
checkpath(PyObject * tag)1123 checkpath(PyObject* tag)
1124 {
1125 Py_ssize_t i;
1126 int check = 1;
1127
1128 /* check if a tag contains an xpath character */
1129
1130 #define PATHCHAR(ch) \
1131 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1132
1133 if (PyUnicode_Check(tag)) {
1134 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1135 const void *data = PyUnicode_DATA(tag);
1136 unsigned int kind = PyUnicode_KIND(tag);
1137 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138 PyUnicode_READ(kind, data, 1) == '}' || (
1139 PyUnicode_READ(kind, data, 1) == '*' &&
1140 PyUnicode_READ(kind, data, 2) == '}'))) {
1141 /* wildcard: '{}tag' or '{*}tag' */
1142 return 1;
1143 }
1144 for (i = 0; i < len; i++) {
1145 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146 if (ch == '{')
1147 check = 0;
1148 else if (ch == '}')
1149 check = 1;
1150 else if (check && PATHCHAR(ch))
1151 return 1;
1152 }
1153 return 0;
1154 }
1155 if (PyBytes_Check(tag)) {
1156 const char *p = PyBytes_AS_STRING(tag);
1157 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158 if (len >= 3 && p[0] == '{' && (
1159 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1160 /* wildcard: '{}tag' or '{*}tag' */
1161 return 1;
1162 }
1163 for (i = 0; i < len; i++) {
1164 if (p[i] == '{')
1165 check = 0;
1166 else if (p[i] == '}')
1167 check = 1;
1168 else if (check && PATHCHAR(p[i]))
1169 return 1;
1170 }
1171 return 0;
1172 }
1173
1174 return 1; /* unknown type; might be path expression */
1175 }
1176
1177 /*[clinic input]
1178 _elementtree.Element.extend
1179
1180 elements: object
1181 /
1182
1183 [clinic start generated code]*/
1184
1185 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1186 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1188 {
1189 PyObject* seq;
1190 Py_ssize_t i;
1191
1192 seq = PySequence_Fast(elements, "");
1193 if (!seq) {
1194 PyErr_Format(
1195 PyExc_TypeError,
1196 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1197 );
1198 return NULL;
1199 }
1200
1201 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1202 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1203 Py_INCREF(element);
1204 if (element_add_subelement(self, element) < 0) {
1205 Py_DECREF(seq);
1206 Py_DECREF(element);
1207 return NULL;
1208 }
1209 Py_DECREF(element);
1210 }
1211
1212 Py_DECREF(seq);
1213
1214 Py_RETURN_NONE;
1215 }
1216
1217 /*[clinic input]
1218 _elementtree.Element.find
1219
1220 path: object
1221 namespaces: object = None
1222
1223 [clinic start generated code]*/
1224
1225 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1226 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227 PyObject *namespaces)
1228 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1229 {
1230 Py_ssize_t i;
1231 elementtreestate *st = ET_STATE_GLOBAL;
1232
1233 if (checkpath(path) || namespaces != Py_None) {
1234 _Py_IDENTIFIER(find);
1235 return _PyObject_CallMethodIdObjArgs(
1236 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1237 );
1238 }
1239
1240 if (!self->extra)
1241 Py_RETURN_NONE;
1242
1243 for (i = 0; i < self->extra->length; i++) {
1244 PyObject* item = self->extra->children[i];
1245 int rc;
1246 assert(Element_Check(item));
1247 Py_INCREF(item);
1248 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1249 if (rc > 0)
1250 return item;
1251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
1254 }
1255
1256 Py_RETURN_NONE;
1257 }
1258
1259 /*[clinic input]
1260 _elementtree.Element.findtext
1261
1262 path: object
1263 default: object = None
1264 namespaces: object = None
1265
1266 [clinic start generated code]*/
1267
1268 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1269 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270 PyObject *default_value,
1271 PyObject *namespaces)
1272 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1273 {
1274 Py_ssize_t i;
1275 _Py_IDENTIFIER(findtext);
1276 elementtreestate *st = ET_STATE_GLOBAL;
1277
1278 if (checkpath(path) || namespaces != Py_None)
1279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findtext,
1281 self, path, default_value, namespaces, NULL
1282 );
1283
1284 if (!self->extra) {
1285 Py_INCREF(default_value);
1286 return default_value;
1287 }
1288
1289 for (i = 0; i < self->extra->length; i++) {
1290 PyObject *item = self->extra->children[i];
1291 int rc;
1292 assert(Element_Check(item));
1293 Py_INCREF(item);
1294 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1295 if (rc > 0) {
1296 PyObject* text = element_get_text((ElementObject*)item);
1297 if (text == Py_None) {
1298 Py_DECREF(item);
1299 return PyUnicode_New(0, 0);
1300 }
1301 Py_XINCREF(text);
1302 Py_DECREF(item);
1303 return text;
1304 }
1305 Py_DECREF(item);
1306 if (rc < 0)
1307 return NULL;
1308 }
1309
1310 Py_INCREF(default_value);
1311 return default_value;
1312 }
1313
1314 /*[clinic input]
1315 _elementtree.Element.findall
1316
1317 path: object
1318 namespaces: object = None
1319
1320 [clinic start generated code]*/
1321
1322 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1323 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324 PyObject *namespaces)
1325 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1326 {
1327 Py_ssize_t i;
1328 PyObject* out;
1329 elementtreestate *st = ET_STATE_GLOBAL;
1330
1331 if (checkpath(path) || namespaces != Py_None) {
1332 _Py_IDENTIFIER(findall);
1333 return _PyObject_CallMethodIdObjArgs(
1334 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1335 );
1336 }
1337
1338 out = PyList_New(0);
1339 if (!out)
1340 return NULL;
1341
1342 if (!self->extra)
1343 return out;
1344
1345 for (i = 0; i < self->extra->length; i++) {
1346 PyObject* item = self->extra->children[i];
1347 int rc;
1348 assert(Element_Check(item));
1349 Py_INCREF(item);
1350 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1351 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352 Py_DECREF(item);
1353 Py_DECREF(out);
1354 return NULL;
1355 }
1356 Py_DECREF(item);
1357 }
1358
1359 return out;
1360 }
1361
1362 /*[clinic input]
1363 _elementtree.Element.iterfind
1364
1365 path: object
1366 namespaces: object = None
1367
1368 [clinic start generated code]*/
1369
1370 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1371 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372 PyObject *namespaces)
1373 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374 {
1375 PyObject* tag = path;
1376 _Py_IDENTIFIER(iterfind);
1377 elementtreestate *st = ET_STATE_GLOBAL;
1378
1379 return _PyObject_CallMethodIdObjArgs(
1380 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1381 }
1382
1383 /*[clinic input]
1384 _elementtree.Element.get
1385
1386 key: object
1387 default: object = None
1388
1389 [clinic start generated code]*/
1390
1391 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1392 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393 PyObject *default_value)
1394 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1395 {
1396 PyObject* value;
1397
1398 if (!self->extra || !self->extra->attrib)
1399 value = default_value;
1400 else {
1401 value = PyDict_GetItemWithError(self->extra->attrib, key);
1402 if (!value) {
1403 if (PyErr_Occurred()) {
1404 return NULL;
1405 }
1406 value = default_value;
1407 }
1408 }
1409
1410 Py_INCREF(value);
1411 return value;
1412 }
1413
1414 static PyObject *
1415 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1416
1417
1418 /*[clinic input]
1419 _elementtree.Element.iter
1420
1421 tag: object = None
1422
1423 [clinic start generated code]*/
1424
1425 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1426 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1427 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1428 {
1429 if (PyUnicode_Check(tag)) {
1430 if (PyUnicode_READY(tag) < 0)
1431 return NULL;
1432 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1433 tag = Py_None;
1434 }
1435 else if (PyBytes_Check(tag)) {
1436 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1437 tag = Py_None;
1438 }
1439
1440 return create_elementiter(self, tag, 0);
1441 }
1442
1443
1444 /*[clinic input]
1445 _elementtree.Element.itertext
1446
1447 [clinic start generated code]*/
1448
1449 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1450 _elementtree_Element_itertext_impl(ElementObject *self)
1451 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1452 {
1453 return create_elementiter(self, Py_None, 1);
1454 }
1455
1456
1457 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1458 element_getitem(PyObject* self_, Py_ssize_t index)
1459 {
1460 ElementObject* self = (ElementObject*) self_;
1461
1462 if (!self->extra || index < 0 || index >= self->extra->length) {
1463 PyErr_SetString(
1464 PyExc_IndexError,
1465 "child index out of range"
1466 );
1467 return NULL;
1468 }
1469
1470 Py_INCREF(self->extra->children[index]);
1471 return self->extra->children[index];
1472 }
1473
1474 /*[clinic input]
1475 _elementtree.Element.insert
1476
1477 index: Py_ssize_t
1478 subelement: object(subclass_of='&Element_Type')
1479 /
1480
1481 [clinic start generated code]*/
1482
1483 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1484 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1485 PyObject *subelement)
1486 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1487 {
1488 Py_ssize_t i;
1489
1490 if (!self->extra) {
1491 if (create_extra(self, NULL) < 0)
1492 return NULL;
1493 }
1494
1495 if (index < 0) {
1496 index += self->extra->length;
1497 if (index < 0)
1498 index = 0;
1499 }
1500 if (index > self->extra->length)
1501 index = self->extra->length;
1502
1503 if (element_resize(self, 1) < 0)
1504 return NULL;
1505
1506 for (i = self->extra->length; i > index; i--)
1507 self->extra->children[i] = self->extra->children[i-1];
1508
1509 Py_INCREF(subelement);
1510 self->extra->children[index] = subelement;
1511
1512 self->extra->length++;
1513
1514 Py_RETURN_NONE;
1515 }
1516
1517 /*[clinic input]
1518 _elementtree.Element.items
1519
1520 [clinic start generated code]*/
1521
1522 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1523 _elementtree_Element_items_impl(ElementObject *self)
1524 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1525 {
1526 if (!self->extra || !self->extra->attrib)
1527 return PyList_New(0);
1528
1529 return PyDict_Items(self->extra->attrib);
1530 }
1531
1532 /*[clinic input]
1533 _elementtree.Element.keys
1534
1535 [clinic start generated code]*/
1536
1537 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1538 _elementtree_Element_keys_impl(ElementObject *self)
1539 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1540 {
1541 if (!self->extra || !self->extra->attrib)
1542 return PyList_New(0);
1543
1544 return PyDict_Keys(self->extra->attrib);
1545 }
1546
1547 static Py_ssize_t
element_length(ElementObject * self)1548 element_length(ElementObject* self)
1549 {
1550 if (!self->extra)
1551 return 0;
1552
1553 return self->extra->length;
1554 }
1555
1556 /*[clinic input]
1557 _elementtree.Element.makeelement
1558
1559 tag: object
1560 attrib: object(subclass_of='&PyDict_Type')
1561 /
1562
1563 [clinic start generated code]*/
1564
1565 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1566 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1567 PyObject *attrib)
1568 /*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1569 {
1570 PyObject* elem;
1571
1572 attrib = PyDict_Copy(attrib);
1573 if (!attrib)
1574 return NULL;
1575
1576 elem = create_new_element(tag, attrib);
1577
1578 Py_DECREF(attrib);
1579
1580 return elem;
1581 }
1582
1583 /*[clinic input]
1584 _elementtree.Element.remove
1585
1586 subelement: object(subclass_of='&Element_Type')
1587 /
1588
1589 [clinic start generated code]*/
1590
1591 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1592 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1593 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1594 {
1595 Py_ssize_t i;
1596 int rc;
1597 PyObject *found;
1598
1599 if (!self->extra) {
1600 /* element has no children, so raise exception */
1601 PyErr_SetString(
1602 PyExc_ValueError,
1603 "list.remove(x): x not in list"
1604 );
1605 return NULL;
1606 }
1607
1608 for (i = 0; i < self->extra->length; i++) {
1609 if (self->extra->children[i] == subelement)
1610 break;
1611 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1612 if (rc > 0)
1613 break;
1614 if (rc < 0)
1615 return NULL;
1616 }
1617
1618 if (i >= self->extra->length) {
1619 /* subelement is not in children, so raise exception */
1620 PyErr_SetString(
1621 PyExc_ValueError,
1622 "list.remove(x): x not in list"
1623 );
1624 return NULL;
1625 }
1626
1627 found = self->extra->children[i];
1628
1629 self->extra->length--;
1630 for (; i < self->extra->length; i++)
1631 self->extra->children[i] = self->extra->children[i+1];
1632
1633 Py_DECREF(found);
1634 Py_RETURN_NONE;
1635 }
1636
1637 static PyObject*
element_repr(ElementObject * self)1638 element_repr(ElementObject* self)
1639 {
1640 int status;
1641
1642 if (self->tag == NULL)
1643 return PyUnicode_FromFormat("<Element at %p>", self);
1644
1645 status = Py_ReprEnter((PyObject *)self);
1646 if (status == 0) {
1647 PyObject *res;
1648 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1649 Py_ReprLeave((PyObject *)self);
1650 return res;
1651 }
1652 if (status > 0)
1653 PyErr_Format(PyExc_RuntimeError,
1654 "reentrant call inside %s.__repr__",
1655 Py_TYPE(self)->tp_name);
1656 return NULL;
1657 }
1658
1659 /*[clinic input]
1660 _elementtree.Element.set
1661
1662 key: object
1663 value: object
1664 /
1665
1666 [clinic start generated code]*/
1667
1668 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1669 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1670 PyObject *value)
1671 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1672 {
1673 PyObject* attrib;
1674
1675 if (!self->extra) {
1676 if (create_extra(self, NULL) < 0)
1677 return NULL;
1678 }
1679
1680 attrib = element_get_attrib(self);
1681 if (!attrib)
1682 return NULL;
1683
1684 if (PyDict_SetItem(attrib, key, value) < 0)
1685 return NULL;
1686
1687 Py_RETURN_NONE;
1688 }
1689
1690 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1691 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1692 {
1693 ElementObject* self = (ElementObject*) self_;
1694 Py_ssize_t i;
1695 PyObject* old;
1696
1697 if (!self->extra || index < 0 || index >= self->extra->length) {
1698 PyErr_SetString(
1699 PyExc_IndexError,
1700 "child assignment index out of range");
1701 return -1;
1702 }
1703
1704 old = self->extra->children[index];
1705
1706 if (item) {
1707 if (!Element_Check(item)) {
1708 raise_type_error(item);
1709 return -1;
1710 }
1711 Py_INCREF(item);
1712 self->extra->children[index] = item;
1713 } else {
1714 self->extra->length--;
1715 for (i = index; i < self->extra->length; i++)
1716 self->extra->children[i] = self->extra->children[i+1];
1717 }
1718
1719 Py_DECREF(old);
1720
1721 return 0;
1722 }
1723
1724 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1725 element_subscr(PyObject* self_, PyObject* item)
1726 {
1727 ElementObject* self = (ElementObject*) self_;
1728
1729 if (PyIndex_Check(item)) {
1730 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1731
1732 if (i == -1 && PyErr_Occurred()) {
1733 return NULL;
1734 }
1735 if (i < 0 && self->extra)
1736 i += self->extra->length;
1737 return element_getitem(self_, i);
1738 }
1739 else if (PySlice_Check(item)) {
1740 Py_ssize_t start, stop, step, slicelen, i;
1741 size_t cur;
1742 PyObject* list;
1743
1744 if (!self->extra)
1745 return PyList_New(0);
1746
1747 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1748 return NULL;
1749 }
1750 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1751 step);
1752
1753 if (slicelen <= 0)
1754 return PyList_New(0);
1755 else {
1756 list = PyList_New(slicelen);
1757 if (!list)
1758 return NULL;
1759
1760 for (cur = start, i = 0; i < slicelen;
1761 cur += step, i++) {
1762 PyObject* item = self->extra->children[cur];
1763 Py_INCREF(item);
1764 PyList_SET_ITEM(list, i, item);
1765 }
1766
1767 return list;
1768 }
1769 }
1770 else {
1771 PyErr_SetString(PyExc_TypeError,
1772 "element indices must be integers");
1773 return NULL;
1774 }
1775 }
1776
1777 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1778 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1779 {
1780 ElementObject* self = (ElementObject*) self_;
1781
1782 if (PyIndex_Check(item)) {
1783 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1784
1785 if (i == -1 && PyErr_Occurred()) {
1786 return -1;
1787 }
1788 if (i < 0 && self->extra)
1789 i += self->extra->length;
1790 return element_setitem(self_, i, value);
1791 }
1792 else if (PySlice_Check(item)) {
1793 Py_ssize_t start, stop, step, slicelen, newlen, i;
1794 size_t cur;
1795
1796 PyObject* recycle = NULL;
1797 PyObject* seq;
1798
1799 if (!self->extra) {
1800 if (create_extra(self, NULL) < 0)
1801 return -1;
1802 }
1803
1804 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1805 return -1;
1806 }
1807 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1808 step);
1809
1810 if (value == NULL) {
1811 /* Delete slice */
1812 size_t cur;
1813 Py_ssize_t i;
1814
1815 if (slicelen <= 0)
1816 return 0;
1817
1818 /* Since we're deleting, the direction of the range doesn't matter,
1819 * so for simplicity make it always ascending.
1820 */
1821 if (step < 0) {
1822 stop = start + 1;
1823 start = stop + step * (slicelen - 1) - 1;
1824 step = -step;
1825 }
1826
1827 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1828
1829 /* recycle is a list that will contain all the children
1830 * scheduled for removal.
1831 */
1832 if (!(recycle = PyList_New(slicelen))) {
1833 return -1;
1834 }
1835
1836 /* This loop walks over all the children that have to be deleted,
1837 * with cur pointing at them. num_moved is the amount of children
1838 * until the next deleted child that have to be "shifted down" to
1839 * occupy the deleted's places.
1840 * Note that in the ith iteration, shifting is done i+i places down
1841 * because i children were already removed.
1842 */
1843 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1844 /* Compute how many children have to be moved, clipping at the
1845 * list end.
1846 */
1847 Py_ssize_t num_moved = step - 1;
1848 if (cur + step >= (size_t)self->extra->length) {
1849 num_moved = self->extra->length - cur - 1;
1850 }
1851
1852 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1853
1854 memmove(
1855 self->extra->children + cur - i,
1856 self->extra->children + cur + 1,
1857 num_moved * sizeof(PyObject *));
1858 }
1859
1860 /* Leftover "tail" after the last removed child */
1861 cur = start + (size_t)slicelen * step;
1862 if (cur < (size_t)self->extra->length) {
1863 memmove(
1864 self->extra->children + cur - slicelen,
1865 self->extra->children + cur,
1866 (self->extra->length - cur) * sizeof(PyObject *));
1867 }
1868
1869 self->extra->length -= slicelen;
1870
1871 /* Discard the recycle list with all the deleted sub-elements */
1872 Py_DECREF(recycle);
1873 return 0;
1874 }
1875
1876 /* A new slice is actually being assigned */
1877 seq = PySequence_Fast(value, "");
1878 if (!seq) {
1879 PyErr_Format(
1880 PyExc_TypeError,
1881 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1882 );
1883 return -1;
1884 }
1885 newlen = PySequence_Fast_GET_SIZE(seq);
1886
1887 if (step != 1 && newlen != slicelen)
1888 {
1889 Py_DECREF(seq);
1890 PyErr_Format(PyExc_ValueError,
1891 "attempt to assign sequence of size %zd "
1892 "to extended slice of size %zd",
1893 newlen, slicelen
1894 );
1895 return -1;
1896 }
1897
1898 /* Resize before creating the recycle bin, to prevent refleaks. */
1899 if (newlen > slicelen) {
1900 if (element_resize(self, newlen - slicelen) < 0) {
1901 Py_DECREF(seq);
1902 return -1;
1903 }
1904 }
1905
1906 for (i = 0; i < newlen; i++) {
1907 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1908 if (!Element_Check(element)) {
1909 raise_type_error(element);
1910 Py_DECREF(seq);
1911 return -1;
1912 }
1913 }
1914
1915 if (slicelen > 0) {
1916 /* to avoid recursive calls to this method (via decref), move
1917 old items to the recycle bin here, and get rid of them when
1918 we're done modifying the element */
1919 recycle = PyList_New(slicelen);
1920 if (!recycle) {
1921 Py_DECREF(seq);
1922 return -1;
1923 }
1924 for (cur = start, i = 0; i < slicelen;
1925 cur += step, i++)
1926 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1927 }
1928
1929 if (newlen < slicelen) {
1930 /* delete slice */
1931 for (i = stop; i < self->extra->length; i++)
1932 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1933 } else if (newlen > slicelen) {
1934 /* insert slice */
1935 for (i = self->extra->length-1; i >= stop; i--)
1936 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1937 }
1938
1939 /* replace the slice */
1940 for (cur = start, i = 0; i < newlen;
1941 cur += step, i++) {
1942 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1943 Py_INCREF(element);
1944 self->extra->children[cur] = element;
1945 }
1946
1947 self->extra->length += newlen - slicelen;
1948
1949 Py_DECREF(seq);
1950
1951 /* discard the recycle bin, and everything in it */
1952 Py_XDECREF(recycle);
1953
1954 return 0;
1955 }
1956 else {
1957 PyErr_SetString(PyExc_TypeError,
1958 "element indices must be integers");
1959 return -1;
1960 }
1961 }
1962
1963 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1964 element_tag_getter(ElementObject *self, void *closure)
1965 {
1966 PyObject *res = self->tag;
1967 Py_INCREF(res);
1968 return res;
1969 }
1970
1971 static PyObject*
element_text_getter(ElementObject * self,void * closure)1972 element_text_getter(ElementObject *self, void *closure)
1973 {
1974 PyObject *res = element_get_text(self);
1975 Py_XINCREF(res);
1976 return res;
1977 }
1978
1979 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1980 element_tail_getter(ElementObject *self, void *closure)
1981 {
1982 PyObject *res = element_get_tail(self);
1983 Py_XINCREF(res);
1984 return res;
1985 }
1986
1987 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)1988 element_attrib_getter(ElementObject *self, void *closure)
1989 {
1990 PyObject *res;
1991 if (!self->extra) {
1992 if (create_extra(self, NULL) < 0)
1993 return NULL;
1994 }
1995 res = element_get_attrib(self);
1996 Py_XINCREF(res);
1997 return res;
1998 }
1999
2000 /* macro for setter validation */
2001 #define _VALIDATE_ATTR_VALUE(V) \
2002 if ((V) == NULL) { \
2003 PyErr_SetString( \
2004 PyExc_AttributeError, \
2005 "can't delete element attribute"); \
2006 return -1; \
2007 }
2008
2009 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2010 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2011 {
2012 _VALIDATE_ATTR_VALUE(value);
2013 Py_INCREF(value);
2014 Py_SETREF(self->tag, value);
2015 return 0;
2016 }
2017
2018 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2019 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2020 {
2021 _VALIDATE_ATTR_VALUE(value);
2022 Py_INCREF(value);
2023 _set_joined_ptr(&self->text, value);
2024 return 0;
2025 }
2026
2027 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2028 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2029 {
2030 _VALIDATE_ATTR_VALUE(value);
2031 Py_INCREF(value);
2032 _set_joined_ptr(&self->tail, value);
2033 return 0;
2034 }
2035
2036 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2037 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2038 {
2039 _VALIDATE_ATTR_VALUE(value);
2040 if (!PyDict_Check(value)) {
2041 PyErr_Format(PyExc_TypeError,
2042 "attrib must be dict, not %.200s",
2043 value->ob_type->tp_name);
2044 return -1;
2045 }
2046 if (!self->extra) {
2047 if (create_extra(self, NULL) < 0)
2048 return -1;
2049 }
2050 Py_INCREF(value);
2051 Py_XSETREF(self->extra->attrib, value);
2052 return 0;
2053 }
2054
2055 static PySequenceMethods element_as_sequence = {
2056 (lenfunc) element_length,
2057 0, /* sq_concat */
2058 0, /* sq_repeat */
2059 element_getitem,
2060 0,
2061 element_setitem,
2062 0,
2063 };
2064
2065 /******************************* Element iterator ****************************/
2066
2067 /* ElementIterObject represents the iteration state over an XML element in
2068 * pre-order traversal. To keep track of which sub-element should be returned
2069 * next, a stack of parents is maintained. This is a standard stack-based
2070 * iterative pre-order traversal of a tree.
2071 * The stack is managed using a continuous array.
2072 * Each stack item contains the saved parent to which we should return after
2073 * the current one is exhausted, and the next child to examine in that parent.
2074 */
2075 typedef struct ParentLocator_t {
2076 ElementObject *parent;
2077 Py_ssize_t child_index;
2078 } ParentLocator;
2079
2080 typedef struct {
2081 PyObject_HEAD
2082 ParentLocator *parent_stack;
2083 Py_ssize_t parent_stack_used;
2084 Py_ssize_t parent_stack_size;
2085 ElementObject *root_element;
2086 PyObject *sought_tag;
2087 int gettext;
2088 } ElementIterObject;
2089
2090
2091 static void
elementiter_dealloc(ElementIterObject * it)2092 elementiter_dealloc(ElementIterObject *it)
2093 {
2094 Py_ssize_t i = it->parent_stack_used;
2095 it->parent_stack_used = 0;
2096 /* bpo-31095: UnTrack is needed before calling any callbacks */
2097 PyObject_GC_UnTrack(it);
2098 while (i--)
2099 Py_XDECREF(it->parent_stack[i].parent);
2100 PyMem_Free(it->parent_stack);
2101
2102 Py_XDECREF(it->sought_tag);
2103 Py_XDECREF(it->root_element);
2104
2105 PyObject_GC_Del(it);
2106 }
2107
2108 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2109 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2110 {
2111 Py_ssize_t i = it->parent_stack_used;
2112 while (i--)
2113 Py_VISIT(it->parent_stack[i].parent);
2114
2115 Py_VISIT(it->root_element);
2116 Py_VISIT(it->sought_tag);
2117 return 0;
2118 }
2119
2120 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2121 */
2122 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2123 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2124 {
2125 ParentLocator *item;
2126
2127 if (it->parent_stack_used >= it->parent_stack_size) {
2128 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2129 ParentLocator *parent_stack = it->parent_stack;
2130 PyMem_Resize(parent_stack, ParentLocator, new_size);
2131 if (parent_stack == NULL)
2132 return -1;
2133 it->parent_stack = parent_stack;
2134 it->parent_stack_size = new_size;
2135 }
2136 item = it->parent_stack + it->parent_stack_used++;
2137 Py_INCREF(parent);
2138 item->parent = parent;
2139 item->child_index = 0;
2140 return 0;
2141 }
2142
2143 static PyObject *
elementiter_next(ElementIterObject * it)2144 elementiter_next(ElementIterObject *it)
2145 {
2146 /* Sub-element iterator.
2147 *
2148 * A short note on gettext: this function serves both the iter() and
2149 * itertext() methods to avoid code duplication. However, there are a few
2150 * small differences in the way these iterations work. Namely:
2151 * - itertext() only yields text from nodes that have it, and continues
2152 * iterating when a node doesn't have text (so it doesn't return any
2153 * node like iter())
2154 * - itertext() also has to handle tail, after finishing with all the
2155 * children of a node.
2156 */
2157 int rc;
2158 ElementObject *elem;
2159 PyObject *text;
2160
2161 while (1) {
2162 /* Handle the case reached in the beginning and end of iteration, where
2163 * the parent stack is empty. If root_element is NULL and we're here, the
2164 * iterator is exhausted.
2165 */
2166 if (!it->parent_stack_used) {
2167 if (!it->root_element) {
2168 PyErr_SetNone(PyExc_StopIteration);
2169 return NULL;
2170 }
2171
2172 elem = it->root_element; /* steals a reference */
2173 it->root_element = NULL;
2174 }
2175 else {
2176 /* See if there are children left to traverse in the current parent. If
2177 * yes, visit the next child. If not, pop the stack and try again.
2178 */
2179 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2180 Py_ssize_t child_index = item->child_index;
2181 ElementObjectExtra *extra;
2182 elem = item->parent;
2183 extra = elem->extra;
2184 if (!extra || child_index >= extra->length) {
2185 it->parent_stack_used--;
2186 /* Note that extra condition on it->parent_stack_used here;
2187 * this is because itertext() is supposed to only return *inner*
2188 * text, not text following the element it began iteration with.
2189 */
2190 if (it->gettext && it->parent_stack_used) {
2191 text = element_get_tail(elem);
2192 goto gettext;
2193 }
2194 Py_DECREF(elem);
2195 continue;
2196 }
2197
2198 assert(Element_Check(extra->children[child_index]));
2199 elem = (ElementObject *)extra->children[child_index];
2200 item->child_index++;
2201 Py_INCREF(elem);
2202 }
2203
2204 if (parent_stack_push_new(it, elem) < 0) {
2205 Py_DECREF(elem);
2206 PyErr_NoMemory();
2207 return NULL;
2208 }
2209 if (it->gettext) {
2210 text = element_get_text(elem);
2211 goto gettext;
2212 }
2213
2214 if (it->sought_tag == Py_None)
2215 return (PyObject *)elem;
2216
2217 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2218 if (rc > 0)
2219 return (PyObject *)elem;
2220
2221 Py_DECREF(elem);
2222 if (rc < 0)
2223 return NULL;
2224 continue;
2225
2226 gettext:
2227 if (!text) {
2228 Py_DECREF(elem);
2229 return NULL;
2230 }
2231 if (text == Py_None) {
2232 Py_DECREF(elem);
2233 }
2234 else {
2235 Py_INCREF(text);
2236 Py_DECREF(elem);
2237 rc = PyObject_IsTrue(text);
2238 if (rc > 0)
2239 return text;
2240 Py_DECREF(text);
2241 if (rc < 0)
2242 return NULL;
2243 }
2244 }
2245
2246 return NULL;
2247 }
2248
2249
2250 static PyTypeObject ElementIter_Type = {
2251 PyVarObject_HEAD_INIT(NULL, 0)
2252 /* Using the module's name since the pure-Python implementation does not
2253 have such a type. */
2254 "_elementtree._element_iterator", /* tp_name */
2255 sizeof(ElementIterObject), /* tp_basicsize */
2256 0, /* tp_itemsize */
2257 /* methods */
2258 (destructor)elementiter_dealloc, /* tp_dealloc */
2259 0, /* tp_vectorcall_offset */
2260 0, /* tp_getattr */
2261 0, /* tp_setattr */
2262 0, /* tp_as_async */
2263 0, /* tp_repr */
2264 0, /* tp_as_number */
2265 0, /* tp_as_sequence */
2266 0, /* tp_as_mapping */
2267 0, /* tp_hash */
2268 0, /* tp_call */
2269 0, /* tp_str */
2270 0, /* tp_getattro */
2271 0, /* tp_setattro */
2272 0, /* tp_as_buffer */
2273 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2274 0, /* tp_doc */
2275 (traverseproc)elementiter_traverse, /* tp_traverse */
2276 0, /* tp_clear */
2277 0, /* tp_richcompare */
2278 0, /* tp_weaklistoffset */
2279 PyObject_SelfIter, /* tp_iter */
2280 (iternextfunc)elementiter_next, /* tp_iternext */
2281 0, /* tp_methods */
2282 0, /* tp_members */
2283 0, /* tp_getset */
2284 0, /* tp_base */
2285 0, /* tp_dict */
2286 0, /* tp_descr_get */
2287 0, /* tp_descr_set */
2288 0, /* tp_dictoffset */
2289 0, /* tp_init */
2290 0, /* tp_alloc */
2291 0, /* tp_new */
2292 };
2293
2294 #define INIT_PARENT_STACK_SIZE 8
2295
2296 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2297 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2298 {
2299 ElementIterObject *it;
2300
2301 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2302 if (!it)
2303 return NULL;
2304
2305 Py_INCREF(tag);
2306 it->sought_tag = tag;
2307 it->gettext = gettext;
2308 Py_INCREF(self);
2309 it->root_element = self;
2310
2311 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2312 if (it->parent_stack == NULL) {
2313 Py_DECREF(it);
2314 PyErr_NoMemory();
2315 return NULL;
2316 }
2317 it->parent_stack_used = 0;
2318 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2319
2320 PyObject_GC_Track(it);
2321
2322 return (PyObject *)it;
2323 }
2324
2325
2326 /* ==================================================================== */
2327 /* the tree builder type */
2328
2329 typedef struct {
2330 PyObject_HEAD
2331
2332 PyObject *root; /* root node (first created node) */
2333
2334 PyObject *this; /* current node */
2335 PyObject *last; /* most recently created node */
2336 PyObject *last_for_tail; /* most recently created node that takes a tail */
2337
2338 PyObject *data; /* data collector (string or list), or NULL */
2339
2340 PyObject *stack; /* element stack */
2341 Py_ssize_t index; /* current stack size (0 means empty) */
2342
2343 PyObject *element_factory;
2344 PyObject *comment_factory;
2345 PyObject *pi_factory;
2346
2347 /* element tracing */
2348 PyObject *events_append; /* the append method of the list of events, or NULL */
2349 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2350 PyObject *end_event_obj;
2351 PyObject *start_ns_event_obj;
2352 PyObject *end_ns_event_obj;
2353 PyObject *comment_event_obj;
2354 PyObject *pi_event_obj;
2355
2356 char insert_comments;
2357 char insert_pis;
2358 } TreeBuilderObject;
2359
2360 #define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2361
2362 /* -------------------------------------------------------------------- */
2363 /* constructor and destructor */
2364
2365 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2366 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2367 {
2368 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2369 if (t != NULL) {
2370 t->root = NULL;
2371
2372 Py_INCREF(Py_None);
2373 t->this = Py_None;
2374 Py_INCREF(Py_None);
2375 t->last = Py_None;
2376
2377 t->data = NULL;
2378 t->element_factory = NULL;
2379 t->comment_factory = NULL;
2380 t->pi_factory = NULL;
2381 t->stack = PyList_New(20);
2382 if (!t->stack) {
2383 Py_DECREF(t->this);
2384 Py_DECREF(t->last);
2385 Py_DECREF((PyObject *) t);
2386 return NULL;
2387 }
2388 t->index = 0;
2389
2390 t->events_append = NULL;
2391 t->start_event_obj = t->end_event_obj = NULL;
2392 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2393 t->comment_event_obj = t->pi_event_obj = NULL;
2394 t->insert_comments = t->insert_pis = 0;
2395 }
2396 return (PyObject *)t;
2397 }
2398
2399 /*[clinic input]
2400 _elementtree.TreeBuilder.__init__
2401
2402 element_factory: object = None
2403 *
2404 comment_factory: object = None
2405 pi_factory: object = None
2406 insert_comments: bool = False
2407 insert_pis: bool = False
2408
2409 [clinic start generated code]*/
2410
2411 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2412 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2413 PyObject *element_factory,
2414 PyObject *comment_factory,
2415 PyObject *pi_factory,
2416 int insert_comments, int insert_pis)
2417 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2418 {
2419 if (element_factory != Py_None) {
2420 Py_INCREF(element_factory);
2421 Py_XSETREF(self->element_factory, element_factory);
2422 } else {
2423 Py_CLEAR(self->element_factory);
2424 }
2425
2426 if (comment_factory == Py_None) {
2427 elementtreestate *st = ET_STATE_GLOBAL;
2428 comment_factory = st->comment_factory;
2429 }
2430 if (comment_factory) {
2431 Py_INCREF(comment_factory);
2432 Py_XSETREF(self->comment_factory, comment_factory);
2433 self->insert_comments = insert_comments;
2434 } else {
2435 Py_CLEAR(self->comment_factory);
2436 self->insert_comments = 0;
2437 }
2438
2439 if (pi_factory == Py_None) {
2440 elementtreestate *st = ET_STATE_GLOBAL;
2441 pi_factory = st->pi_factory;
2442 }
2443 if (pi_factory) {
2444 Py_INCREF(pi_factory);
2445 Py_XSETREF(self->pi_factory, pi_factory);
2446 self->insert_pis = insert_pis;
2447 } else {
2448 Py_CLEAR(self->pi_factory);
2449 self->insert_pis = 0;
2450 }
2451
2452 return 0;
2453 }
2454
2455 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2456 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2457 {
2458 Py_VISIT(self->pi_event_obj);
2459 Py_VISIT(self->comment_event_obj);
2460 Py_VISIT(self->end_ns_event_obj);
2461 Py_VISIT(self->start_ns_event_obj);
2462 Py_VISIT(self->end_event_obj);
2463 Py_VISIT(self->start_event_obj);
2464 Py_VISIT(self->events_append);
2465 Py_VISIT(self->root);
2466 Py_VISIT(self->this);
2467 Py_VISIT(self->last);
2468 Py_VISIT(self->last_for_tail);
2469 Py_VISIT(self->data);
2470 Py_VISIT(self->stack);
2471 Py_VISIT(self->pi_factory);
2472 Py_VISIT(self->comment_factory);
2473 Py_VISIT(self->element_factory);
2474 return 0;
2475 }
2476
2477 static int
treebuilder_gc_clear(TreeBuilderObject * self)2478 treebuilder_gc_clear(TreeBuilderObject *self)
2479 {
2480 Py_CLEAR(self->pi_event_obj);
2481 Py_CLEAR(self->comment_event_obj);
2482 Py_CLEAR(self->end_ns_event_obj);
2483 Py_CLEAR(self->start_ns_event_obj);
2484 Py_CLEAR(self->end_event_obj);
2485 Py_CLEAR(self->start_event_obj);
2486 Py_CLEAR(self->events_append);
2487 Py_CLEAR(self->stack);
2488 Py_CLEAR(self->data);
2489 Py_CLEAR(self->last);
2490 Py_CLEAR(self->last_for_tail);
2491 Py_CLEAR(self->this);
2492 Py_CLEAR(self->pi_factory);
2493 Py_CLEAR(self->comment_factory);
2494 Py_CLEAR(self->element_factory);
2495 Py_CLEAR(self->root);
2496 return 0;
2497 }
2498
2499 static void
treebuilder_dealloc(TreeBuilderObject * self)2500 treebuilder_dealloc(TreeBuilderObject *self)
2501 {
2502 PyObject_GC_UnTrack(self);
2503 treebuilder_gc_clear(self);
2504 Py_TYPE(self)->tp_free((PyObject *)self);
2505 }
2506
2507 /* -------------------------------------------------------------------- */
2508 /* helpers for handling of arbitrary element-like objects */
2509
2510 /*[clinic input]
2511 _elementtree._set_factories
2512
2513 comment_factory: object
2514 pi_factory: object
2515 /
2516
2517 Change the factories used to create comments and processing instructions.
2518
2519 For internal use only.
2520 [clinic start generated code]*/
2521
2522 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2523 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2524 PyObject *pi_factory)
2525 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2526 {
2527 elementtreestate *st = ET_STATE_GLOBAL;
2528 PyObject *old;
2529
2530 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2531 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2532 Py_TYPE(comment_factory)->tp_name);
2533 return NULL;
2534 }
2535 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2536 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2537 Py_TYPE(pi_factory)->tp_name);
2538 return NULL;
2539 }
2540
2541 old = PyTuple_Pack(2,
2542 st->comment_factory ? st->comment_factory : Py_None,
2543 st->pi_factory ? st->pi_factory : Py_None);
2544
2545 if (comment_factory == Py_None) {
2546 Py_CLEAR(st->comment_factory);
2547 } else {
2548 Py_INCREF(comment_factory);
2549 Py_XSETREF(st->comment_factory, comment_factory);
2550 }
2551 if (pi_factory == Py_None) {
2552 Py_CLEAR(st->pi_factory);
2553 } else {
2554 Py_INCREF(pi_factory);
2555 Py_XSETREF(st->pi_factory, pi_factory);
2556 }
2557
2558 return old;
2559 }
2560
2561 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2562 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2563 PyObject **dest, _Py_Identifier *name)
2564 {
2565 /* Fast paths for the "almost always" cases. */
2566 if (Element_CheckExact(element)) {
2567 PyObject *dest_obj = JOIN_OBJ(*dest);
2568 if (dest_obj == Py_None) {
2569 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2570 *data = NULL;
2571 Py_DECREF(dest_obj);
2572 return 0;
2573 }
2574 else if (JOIN_GET(*dest)) {
2575 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2576 return -1;
2577 }
2578 Py_CLEAR(*data);
2579 return 0;
2580 }
2581 }
2582
2583 /* Fallback for the non-Element / non-trivial cases. */
2584 {
2585 int r;
2586 PyObject* joined;
2587 PyObject* previous = _PyObject_GetAttrId(element, name);
2588 if (!previous)
2589 return -1;
2590 joined = list_join(*data);
2591 if (!joined) {
2592 Py_DECREF(previous);
2593 return -1;
2594 }
2595 if (previous != Py_None) {
2596 PyObject *tmp = PyNumber_Add(previous, joined);
2597 Py_DECREF(joined);
2598 Py_DECREF(previous);
2599 if (!tmp)
2600 return -1;
2601 joined = tmp;
2602 } else {
2603 Py_DECREF(previous);
2604 }
2605
2606 r = _PyObject_SetAttrId(element, name, joined);
2607 Py_DECREF(joined);
2608 if (r < 0)
2609 return -1;
2610 Py_CLEAR(*data);
2611 return 0;
2612 }
2613 }
2614
2615 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2616 treebuilder_flush_data(TreeBuilderObject* self)
2617 {
2618 if (!self->data) {
2619 return 0;
2620 }
2621
2622 if (!self->last_for_tail) {
2623 PyObject *element = self->last;
2624 _Py_IDENTIFIER(text);
2625 return treebuilder_extend_element_text_or_tail(
2626 element, &self->data,
2627 &((ElementObject *) element)->text, &PyId_text);
2628 }
2629 else {
2630 PyObject *element = self->last_for_tail;
2631 _Py_IDENTIFIER(tail);
2632 return treebuilder_extend_element_text_or_tail(
2633 element, &self->data,
2634 &((ElementObject *) element)->tail, &PyId_tail);
2635 }
2636 }
2637
2638 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2639 treebuilder_add_subelement(PyObject *element, PyObject *child)
2640 {
2641 _Py_IDENTIFIER(append);
2642 if (Element_CheckExact(element)) {
2643 ElementObject *elem = (ElementObject *) element;
2644 return element_add_subelement(elem, child);
2645 }
2646 else {
2647 PyObject *res;
2648 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2649 if (res == NULL)
2650 return -1;
2651 Py_DECREF(res);
2652 return 0;
2653 }
2654 }
2655
2656 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2657 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2658 PyObject *node)
2659 {
2660 if (action != NULL) {
2661 PyObject *res;
2662 PyObject *event = PyTuple_Pack(2, action, node);
2663 if (event == NULL)
2664 return -1;
2665 res = PyObject_CallOneArg(self->events_append, event);
2666 Py_DECREF(event);
2667 if (res == NULL)
2668 return -1;
2669 Py_DECREF(res);
2670 }
2671 return 0;
2672 }
2673
2674 /* -------------------------------------------------------------------- */
2675 /* handlers */
2676
2677 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2678 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2679 PyObject* attrib)
2680 {
2681 PyObject* node;
2682 PyObject* this;
2683 elementtreestate *st = ET_STATE_GLOBAL;
2684
2685 if (treebuilder_flush_data(self) < 0) {
2686 return NULL;
2687 }
2688
2689 if (!self->element_factory) {
2690 node = create_new_element(tag, attrib);
2691 } else if (attrib == NULL) {
2692 attrib = PyDict_New();
2693 if (!attrib)
2694 return NULL;
2695 node = PyObject_CallFunctionObjArgs(self->element_factory,
2696 tag, attrib, NULL);
2697 Py_DECREF(attrib);
2698 }
2699 else {
2700 node = PyObject_CallFunctionObjArgs(self->element_factory,
2701 tag, attrib, NULL);
2702 }
2703 if (!node) {
2704 return NULL;
2705 }
2706
2707 this = self->this;
2708 Py_CLEAR(self->last_for_tail);
2709
2710 if (this != Py_None) {
2711 if (treebuilder_add_subelement(this, node) < 0)
2712 goto error;
2713 } else {
2714 if (self->root) {
2715 PyErr_SetString(
2716 st->parseerror_obj,
2717 "multiple elements on top level"
2718 );
2719 goto error;
2720 }
2721 Py_INCREF(node);
2722 self->root = node;
2723 }
2724
2725 if (self->index < PyList_GET_SIZE(self->stack)) {
2726 if (PyList_SetItem(self->stack, self->index, this) < 0)
2727 goto error;
2728 Py_INCREF(this);
2729 } else {
2730 if (PyList_Append(self->stack, this) < 0)
2731 goto error;
2732 }
2733 self->index++;
2734
2735 Py_INCREF(node);
2736 Py_SETREF(self->this, node);
2737 Py_INCREF(node);
2738 Py_SETREF(self->last, node);
2739
2740 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2741 goto error;
2742
2743 return node;
2744
2745 error:
2746 Py_DECREF(node);
2747 return NULL;
2748 }
2749
2750 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2751 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2752 {
2753 if (!self->data) {
2754 if (self->last == Py_None) {
2755 /* ignore calls to data before the first call to start */
2756 Py_RETURN_NONE;
2757 }
2758 /* store the first item as is */
2759 Py_INCREF(data); self->data = data;
2760 } else {
2761 /* more than one item; use a list to collect items */
2762 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2763 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2764 /* XXX this code path unused in Python 3? */
2765 /* expat often generates single character data sections; handle
2766 the most common case by resizing the existing string... */
2767 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2768 if (_PyBytes_Resize(&self->data, size + 1) < 0)
2769 return NULL;
2770 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2771 } else if (PyList_CheckExact(self->data)) {
2772 if (PyList_Append(self->data, data) < 0)
2773 return NULL;
2774 } else {
2775 PyObject* list = PyList_New(2);
2776 if (!list)
2777 return NULL;
2778 PyList_SET_ITEM(list, 0, self->data);
2779 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2780 self->data = list;
2781 }
2782 }
2783
2784 Py_RETURN_NONE;
2785 }
2786
2787 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2788 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2789 {
2790 PyObject* item;
2791
2792 if (treebuilder_flush_data(self) < 0) {
2793 return NULL;
2794 }
2795
2796 if (self->index == 0) {
2797 PyErr_SetString(
2798 PyExc_IndexError,
2799 "pop from empty stack"
2800 );
2801 return NULL;
2802 }
2803
2804 item = self->last;
2805 self->last = self->this;
2806 Py_INCREF(self->last);
2807 Py_XSETREF(self->last_for_tail, self->last);
2808 self->index--;
2809 self->this = PyList_GET_ITEM(self->stack, self->index);
2810 Py_INCREF(self->this);
2811 Py_DECREF(item);
2812
2813 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2814 return NULL;
2815
2816 Py_INCREF(self->last);
2817 return (PyObject*) self->last;
2818 }
2819
2820 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2821 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2822 {
2823 PyObject* comment;
2824 PyObject* this;
2825
2826 if (treebuilder_flush_data(self) < 0) {
2827 return NULL;
2828 }
2829
2830 if (self->comment_factory) {
2831 comment = PyObject_CallOneArg(self->comment_factory, text);
2832 if (!comment)
2833 return NULL;
2834
2835 this = self->this;
2836 if (self->insert_comments && this != Py_None) {
2837 if (treebuilder_add_subelement(this, comment) < 0)
2838 goto error;
2839 Py_INCREF(comment);
2840 Py_XSETREF(self->last_for_tail, comment);
2841 }
2842 } else {
2843 Py_INCREF(text);
2844 comment = text;
2845 }
2846
2847 if (self->events_append && self->comment_event_obj) {
2848 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2849 goto error;
2850 }
2851
2852 return comment;
2853
2854 error:
2855 Py_DECREF(comment);
2856 return NULL;
2857 }
2858
2859 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2860 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2861 {
2862 PyObject* pi;
2863 PyObject* this;
2864 PyObject* stack[2] = {target, text};
2865
2866 if (treebuilder_flush_data(self) < 0) {
2867 return NULL;
2868 }
2869
2870 if (self->pi_factory) {
2871 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2872 if (!pi) {
2873 return NULL;
2874 }
2875
2876 this = self->this;
2877 if (self->insert_pis && this != Py_None) {
2878 if (treebuilder_add_subelement(this, pi) < 0)
2879 goto error;
2880 Py_INCREF(pi);
2881 Py_XSETREF(self->last_for_tail, pi);
2882 }
2883 } else {
2884 pi = PyTuple_Pack(2, target, text);
2885 if (!pi) {
2886 return NULL;
2887 }
2888 }
2889
2890 if (self->events_append && self->pi_event_obj) {
2891 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2892 goto error;
2893 }
2894
2895 return pi;
2896
2897 error:
2898 Py_DECREF(pi);
2899 return NULL;
2900 }
2901
2902 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2903 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2904 {
2905 PyObject* parcel;
2906
2907 if (self->events_append && self->start_ns_event_obj) {
2908 parcel = PyTuple_Pack(2, prefix, uri);
2909 if (!parcel) {
2910 return NULL;
2911 }
2912
2913 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2914 Py_DECREF(parcel);
2915 return NULL;
2916 }
2917 Py_DECREF(parcel);
2918 }
2919
2920 Py_RETURN_NONE;
2921 }
2922
2923 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2924 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2925 {
2926 if (self->events_append && self->end_ns_event_obj) {
2927 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2928 return NULL;
2929 }
2930 }
2931
2932 Py_RETURN_NONE;
2933 }
2934
2935 /* -------------------------------------------------------------------- */
2936 /* methods (in alphabetical order) */
2937
2938 /*[clinic input]
2939 _elementtree.TreeBuilder.data
2940
2941 data: object
2942 /
2943
2944 [clinic start generated code]*/
2945
2946 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2947 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2948 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2949 {
2950 return treebuilder_handle_data(self, data);
2951 }
2952
2953 /*[clinic input]
2954 _elementtree.TreeBuilder.end
2955
2956 tag: object
2957 /
2958
2959 [clinic start generated code]*/
2960
2961 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2962 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2963 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2964 {
2965 return treebuilder_handle_end(self, tag);
2966 }
2967
2968 /*[clinic input]
2969 _elementtree.TreeBuilder.comment
2970
2971 text: object
2972 /
2973
2974 [clinic start generated code]*/
2975
2976 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)2977 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2978 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2979 {
2980 return treebuilder_handle_comment(self, text);
2981 }
2982
2983 /*[clinic input]
2984 _elementtree.TreeBuilder.pi
2985
2986 target: object
2987 text: object = None
2988 /
2989
2990 [clinic start generated code]*/
2991
2992 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)2993 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2994 PyObject *text)
2995 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2996 {
2997 return treebuilder_handle_pi(self, target, text);
2998 }
2999
3000 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)3001 treebuilder_done(TreeBuilderObject* self)
3002 {
3003 PyObject* res;
3004
3005 /* FIXME: check stack size? */
3006
3007 if (self->root)
3008 res = self->root;
3009 else
3010 res = Py_None;
3011
3012 Py_INCREF(res);
3013 return res;
3014 }
3015
3016 /*[clinic input]
3017 _elementtree.TreeBuilder.close
3018
3019 [clinic start generated code]*/
3020
3021 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3022 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3023 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3024 {
3025 return treebuilder_done(self);
3026 }
3027
3028 /*[clinic input]
3029 _elementtree.TreeBuilder.start
3030
3031 tag: object
3032 attrs: object(subclass_of='&PyDict_Type')
3033 /
3034
3035 [clinic start generated code]*/
3036
3037 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3038 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3039 PyObject *attrs)
3040 /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3041 {
3042 return treebuilder_handle_start(self, tag, attrs);
3043 }
3044
3045 /* ==================================================================== */
3046 /* the expat interface */
3047
3048 #include "expat.h"
3049 #include "pyexpat.h"
3050
3051 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3052 * cached globally without being in per-module state.
3053 */
3054 static struct PyExpat_CAPI *expat_capi;
3055 #define EXPAT(func) (expat_capi->func)
3056
3057 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3058 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3059
3060 typedef struct {
3061 PyObject_HEAD
3062
3063 XML_Parser parser;
3064
3065 PyObject *target;
3066 PyObject *entity;
3067
3068 PyObject *names;
3069
3070 PyObject *handle_start_ns;
3071 PyObject *handle_end_ns;
3072 PyObject *handle_start;
3073 PyObject *handle_data;
3074 PyObject *handle_end;
3075
3076 PyObject *handle_comment;
3077 PyObject *handle_pi;
3078 PyObject *handle_doctype;
3079
3080 PyObject *handle_close;
3081
3082 } XMLParserObject;
3083
3084 /* helpers */
3085
3086 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3087 makeuniversal(XMLParserObject* self, const char* string)
3088 {
3089 /* convert a UTF-8 tag/attribute name from the expat parser
3090 to a universal name string */
3091
3092 Py_ssize_t size = (Py_ssize_t) strlen(string);
3093 PyObject* key;
3094 PyObject* value;
3095
3096 /* look the 'raw' name up in the names dictionary */
3097 key = PyBytes_FromStringAndSize(string, size);
3098 if (!key)
3099 return NULL;
3100
3101 value = PyDict_GetItemWithError(self->names, key);
3102
3103 if (value) {
3104 Py_INCREF(value);
3105 }
3106 else if (!PyErr_Occurred()) {
3107 /* new name. convert to universal name, and decode as
3108 necessary */
3109
3110 PyObject* tag;
3111 char* p;
3112 Py_ssize_t i;
3113
3114 /* look for namespace separator */
3115 for (i = 0; i < size; i++)
3116 if (string[i] == '}')
3117 break;
3118 if (i != size) {
3119 /* convert to universal name */
3120 tag = PyBytes_FromStringAndSize(NULL, size+1);
3121 if (tag == NULL) {
3122 Py_DECREF(key);
3123 return NULL;
3124 }
3125 p = PyBytes_AS_STRING(tag);
3126 p[0] = '{';
3127 memcpy(p+1, string, size);
3128 size++;
3129 } else {
3130 /* plain name; use key as tag */
3131 Py_INCREF(key);
3132 tag = key;
3133 }
3134
3135 /* decode universal name */
3136 p = PyBytes_AS_STRING(tag);
3137 value = PyUnicode_DecodeUTF8(p, size, "strict");
3138 Py_DECREF(tag);
3139 if (!value) {
3140 Py_DECREF(key);
3141 return NULL;
3142 }
3143
3144 /* add to names dictionary */
3145 if (PyDict_SetItem(self->names, key, value) < 0) {
3146 Py_DECREF(key);
3147 Py_DECREF(value);
3148 return NULL;
3149 }
3150 }
3151
3152 Py_DECREF(key);
3153 return value;
3154 }
3155
3156 /* Set the ParseError exception with the given parameters.
3157 * If message is not NULL, it's used as the error string. Otherwise, the
3158 * message string is the default for the given error_code.
3159 */
3160 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3161 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3162 const char *message)
3163 {
3164 PyObject *errmsg, *error, *position, *code;
3165 elementtreestate *st = ET_STATE_GLOBAL;
3166
3167 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3168 message ? message : EXPAT(ErrorString)(error_code),
3169 line, column);
3170 if (errmsg == NULL)
3171 return;
3172
3173 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3174 Py_DECREF(errmsg);
3175 if (!error)
3176 return;
3177
3178 /* Add code and position attributes */
3179 code = PyLong_FromLong((long)error_code);
3180 if (!code) {
3181 Py_DECREF(error);
3182 return;
3183 }
3184 if (PyObject_SetAttrString(error, "code", code) == -1) {
3185 Py_DECREF(error);
3186 Py_DECREF(code);
3187 return;
3188 }
3189 Py_DECREF(code);
3190
3191 position = Py_BuildValue("(nn)", line, column);
3192 if (!position) {
3193 Py_DECREF(error);
3194 return;
3195 }
3196 if (PyObject_SetAttrString(error, "position", position) == -1) {
3197 Py_DECREF(error);
3198 Py_DECREF(position);
3199 return;
3200 }
3201 Py_DECREF(position);
3202
3203 PyErr_SetObject(st->parseerror_obj, error);
3204 Py_DECREF(error);
3205 }
3206
3207 /* -------------------------------------------------------------------- */
3208 /* handlers */
3209
3210 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3211 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3212 int data_len)
3213 {
3214 PyObject* key;
3215 PyObject* value;
3216 PyObject* res;
3217
3218 if (data_len < 2 || data_in[0] != '&')
3219 return;
3220
3221 if (PyErr_Occurred())
3222 return;
3223
3224 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3225 if (!key)
3226 return;
3227
3228 value = PyDict_GetItemWithError(self->entity, key);
3229
3230 if (value) {
3231 if (TreeBuilder_CheckExact(self->target))
3232 res = treebuilder_handle_data(
3233 (TreeBuilderObject*) self->target, value
3234 );
3235 else if (self->handle_data)
3236 res = PyObject_CallOneArg(self->handle_data, value);
3237 else
3238 res = NULL;
3239 Py_XDECREF(res);
3240 } else if (!PyErr_Occurred()) {
3241 /* Report the first error, not the last */
3242 char message[128] = "undefined entity ";
3243 strncat(message, data_in, data_len < 100?data_len:100);
3244 expat_set_error(
3245 XML_ERROR_UNDEFINED_ENTITY,
3246 EXPAT(GetErrorLineNumber)(self->parser),
3247 EXPAT(GetErrorColumnNumber)(self->parser),
3248 message
3249 );
3250 }
3251
3252 Py_DECREF(key);
3253 }
3254
3255 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3256 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3257 const XML_Char **attrib_in)
3258 {
3259 PyObject* res;
3260 PyObject* tag;
3261 PyObject* attrib;
3262 int ok;
3263
3264 if (PyErr_Occurred())
3265 return;
3266
3267 /* tag name */
3268 tag = makeuniversal(self, tag_in);
3269 if (!tag)
3270 return; /* parser will look for errors */
3271
3272 /* attributes */
3273 if (attrib_in[0]) {
3274 attrib = PyDict_New();
3275 if (!attrib) {
3276 Py_DECREF(tag);
3277 return;
3278 }
3279 while (attrib_in[0] && attrib_in[1]) {
3280 PyObject* key = makeuniversal(self, attrib_in[0]);
3281 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3282 if (!key || !value) {
3283 Py_XDECREF(value);
3284 Py_XDECREF(key);
3285 Py_DECREF(attrib);
3286 Py_DECREF(tag);
3287 return;
3288 }
3289 ok = PyDict_SetItem(attrib, key, value);
3290 Py_DECREF(value);
3291 Py_DECREF(key);
3292 if (ok < 0) {
3293 Py_DECREF(attrib);
3294 Py_DECREF(tag);
3295 return;
3296 }
3297 attrib_in += 2;
3298 }
3299 } else {
3300 attrib = NULL;
3301 }
3302
3303 if (TreeBuilder_CheckExact(self->target)) {
3304 /* shortcut */
3305 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3306 tag, attrib);
3307 }
3308 else if (self->handle_start) {
3309 if (attrib == NULL) {
3310 attrib = PyDict_New();
3311 if (!attrib) {
3312 Py_DECREF(tag);
3313 return;
3314 }
3315 }
3316 res = PyObject_CallFunctionObjArgs(self->handle_start,
3317 tag, attrib, NULL);
3318 } else
3319 res = NULL;
3320
3321 Py_DECREF(tag);
3322 Py_XDECREF(attrib);
3323
3324 Py_XDECREF(res);
3325 }
3326
3327 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3328 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3329 int data_len)
3330 {
3331 PyObject* data;
3332 PyObject* res;
3333
3334 if (PyErr_Occurred())
3335 return;
3336
3337 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3338 if (!data)
3339 return; /* parser will look for errors */
3340
3341 if (TreeBuilder_CheckExact(self->target))
3342 /* shortcut */
3343 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3344 else if (self->handle_data)
3345 res = PyObject_CallOneArg(self->handle_data, data);
3346 else
3347 res = NULL;
3348
3349 Py_DECREF(data);
3350
3351 Py_XDECREF(res);
3352 }
3353
3354 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3355 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3356 {
3357 PyObject* tag;
3358 PyObject* res = NULL;
3359
3360 if (PyErr_Occurred())
3361 return;
3362
3363 if (TreeBuilder_CheckExact(self->target))
3364 /* shortcut */
3365 /* the standard tree builder doesn't look at the end tag */
3366 res = treebuilder_handle_end(
3367 (TreeBuilderObject*) self->target, Py_None
3368 );
3369 else if (self->handle_end) {
3370 tag = makeuniversal(self, tag_in);
3371 if (tag) {
3372 res = PyObject_CallOneArg(self->handle_end, tag);
3373 Py_DECREF(tag);
3374 }
3375 }
3376
3377 Py_XDECREF(res);
3378 }
3379
3380 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3381 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3382 const XML_Char *uri_in)
3383 {
3384 PyObject* res = NULL;
3385 PyObject* uri;
3386 PyObject* prefix;
3387 PyObject* stack[2];
3388
3389 if (PyErr_Occurred())
3390 return;
3391
3392 if (!uri_in)
3393 uri_in = "";
3394 if (!prefix_in)
3395 prefix_in = "";
3396
3397 if (TreeBuilder_CheckExact(self->target)) {
3398 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3399 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3400
3401 if (target->events_append && target->start_ns_event_obj) {
3402 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3403 if (!prefix)
3404 return;
3405 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3406 if (!uri) {
3407 Py_DECREF(prefix);
3408 return;
3409 }
3410
3411 res = treebuilder_handle_start_ns(target, prefix, uri);
3412 Py_DECREF(uri);
3413 Py_DECREF(prefix);
3414 }
3415 } else if (self->handle_start_ns) {
3416 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3417 if (!prefix)
3418 return;
3419 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3420 if (!uri) {
3421 Py_DECREF(prefix);
3422 return;
3423 }
3424
3425 stack[0] = prefix;
3426 stack[1] = uri;
3427 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3428 Py_DECREF(uri);
3429 Py_DECREF(prefix);
3430 }
3431
3432 Py_XDECREF(res);
3433 }
3434
3435 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3436 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3437 {
3438 PyObject *res = NULL;
3439 PyObject* prefix;
3440
3441 if (PyErr_Occurred())
3442 return;
3443
3444 if (!prefix_in)
3445 prefix_in = "";
3446
3447 if (TreeBuilder_CheckExact(self->target)) {
3448 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3449 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3450
3451 if (target->events_append && target->end_ns_event_obj) {
3452 res = treebuilder_handle_end_ns(target, Py_None);
3453 }
3454 } else if (self->handle_end_ns) {
3455 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3456 if (!prefix)
3457 return;
3458
3459 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3460 Py_DECREF(prefix);
3461 }
3462
3463 Py_XDECREF(res);
3464 }
3465
3466 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3467 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3468 {
3469 PyObject* comment;
3470 PyObject* res;
3471
3472 if (PyErr_Occurred())
3473 return;
3474
3475 if (TreeBuilder_CheckExact(self->target)) {
3476 /* shortcut */
3477 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3478
3479 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3480 if (!comment)
3481 return; /* parser will look for errors */
3482
3483 res = treebuilder_handle_comment(target, comment);
3484 Py_XDECREF(res);
3485 Py_DECREF(comment);
3486 } else if (self->handle_comment) {
3487 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3488 if (!comment)
3489 return;
3490
3491 res = PyObject_CallOneArg(self->handle_comment, comment);
3492 Py_XDECREF(res);
3493 Py_DECREF(comment);
3494 }
3495 }
3496
3497 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3498 expat_start_doctype_handler(XMLParserObject *self,
3499 const XML_Char *doctype_name,
3500 const XML_Char *sysid,
3501 const XML_Char *pubid,
3502 int has_internal_subset)
3503 {
3504 _Py_IDENTIFIER(doctype);
3505 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3506 PyObject *res;
3507
3508 if (PyErr_Occurred())
3509 return;
3510
3511 doctype_name_obj = makeuniversal(self, doctype_name);
3512 if (!doctype_name_obj)
3513 return;
3514
3515 if (sysid) {
3516 sysid_obj = makeuniversal(self, sysid);
3517 if (!sysid_obj) {
3518 Py_DECREF(doctype_name_obj);
3519 return;
3520 }
3521 } else {
3522 Py_INCREF(Py_None);
3523 sysid_obj = Py_None;
3524 }
3525
3526 if (pubid) {
3527 pubid_obj = makeuniversal(self, pubid);
3528 if (!pubid_obj) {
3529 Py_DECREF(doctype_name_obj);
3530 Py_DECREF(sysid_obj);
3531 return;
3532 }
3533 } else {
3534 Py_INCREF(Py_None);
3535 pubid_obj = Py_None;
3536 }
3537
3538 /* If the target has a handler for doctype, call it. */
3539 if (self->handle_doctype) {
3540 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3541 doctype_name_obj, pubid_obj,
3542 sysid_obj, NULL);
3543 Py_XDECREF(res);
3544 }
3545 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3546 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3547 "The doctype() method of XMLParser is ignored. "
3548 "Define doctype() method on the TreeBuilder target.",
3549 1);
3550 Py_DECREF(res);
3551 }
3552
3553 Py_DECREF(doctype_name_obj);
3554 Py_DECREF(pubid_obj);
3555 Py_DECREF(sysid_obj);
3556 }
3557
3558 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3559 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3560 const XML_Char* data_in)
3561 {
3562 PyObject* pi_target;
3563 PyObject* data;
3564 PyObject* res;
3565 PyObject* stack[2];
3566
3567 if (PyErr_Occurred())
3568 return;
3569
3570 if (TreeBuilder_CheckExact(self->target)) {
3571 /* shortcut */
3572 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3573
3574 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3575 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3576 if (!pi_target)
3577 goto error;
3578 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3579 if (!data)
3580 goto error;
3581 res = treebuilder_handle_pi(target, pi_target, data);
3582 Py_XDECREF(res);
3583 Py_DECREF(data);
3584 Py_DECREF(pi_target);
3585 }
3586 } else if (self->handle_pi) {
3587 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3588 if (!pi_target)
3589 goto error;
3590 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3591 if (!data)
3592 goto error;
3593
3594 stack[0] = pi_target;
3595 stack[1] = data;
3596 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3597 Py_XDECREF(res);
3598 Py_DECREF(data);
3599 Py_DECREF(pi_target);
3600 }
3601
3602 return;
3603
3604 error:
3605 Py_XDECREF(pi_target);
3606 return;
3607 }
3608
3609 /* -------------------------------------------------------------------- */
3610
3611 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3612 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3613 {
3614 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3615 if (self) {
3616 self->parser = NULL;
3617 self->target = self->entity = self->names = NULL;
3618 self->handle_start_ns = self->handle_end_ns = NULL;
3619 self->handle_start = self->handle_data = self->handle_end = NULL;
3620 self->handle_comment = self->handle_pi = self->handle_close = NULL;
3621 self->handle_doctype = NULL;
3622 }
3623 return (PyObject *)self;
3624 }
3625
3626 static int
ignore_attribute_error(PyObject * value)3627 ignore_attribute_error(PyObject *value)
3628 {
3629 if (value == NULL) {
3630 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3631 return -1;
3632 }
3633 PyErr_Clear();
3634 }
3635 return 0;
3636 }
3637
3638 /*[clinic input]
3639 _elementtree.XMLParser.__init__
3640
3641 *
3642 target: object = NULL
3643 encoding: str(accept={str, NoneType}) = None
3644
3645 [clinic start generated code]*/
3646
3647 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3648 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3649 const char *encoding)
3650 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
3651 {
3652 self->entity = PyDict_New();
3653 if (!self->entity)
3654 return -1;
3655
3656 self->names = PyDict_New();
3657 if (!self->names) {
3658 Py_CLEAR(self->entity);
3659 return -1;
3660 }
3661
3662 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3663 if (!self->parser) {
3664 Py_CLEAR(self->entity);
3665 Py_CLEAR(self->names);
3666 PyErr_NoMemory();
3667 return -1;
3668 }
3669 /* expat < 2.1.0 has no XML_SetHashSalt() */
3670 if (EXPAT(SetHashSalt) != NULL) {
3671 EXPAT(SetHashSalt)(self->parser,
3672 (unsigned long)_Py_HashSecret.expat.hashsalt);
3673 }
3674
3675 if (target) {
3676 Py_INCREF(target);
3677 } else {
3678 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3679 if (!target) {
3680 Py_CLEAR(self->entity);
3681 Py_CLEAR(self->names);
3682 return -1;
3683 }
3684 }
3685 self->target = target;
3686
3687 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3688 if (ignore_attribute_error(self->handle_start_ns)) {
3689 return -1;
3690 }
3691 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3692 if (ignore_attribute_error(self->handle_end_ns)) {
3693 return -1;
3694 }
3695 self->handle_start = PyObject_GetAttrString(target, "start");
3696 if (ignore_attribute_error(self->handle_start)) {
3697 return -1;
3698 }
3699 self->handle_data = PyObject_GetAttrString(target, "data");
3700 if (ignore_attribute_error(self->handle_data)) {
3701 return -1;
3702 }
3703 self->handle_end = PyObject_GetAttrString(target, "end");
3704 if (ignore_attribute_error(self->handle_end)) {
3705 return -1;
3706 }
3707 self->handle_comment = PyObject_GetAttrString(target, "comment");
3708 if (ignore_attribute_error(self->handle_comment)) {
3709 return -1;
3710 }
3711 self->handle_pi = PyObject_GetAttrString(target, "pi");
3712 if (ignore_attribute_error(self->handle_pi)) {
3713 return -1;
3714 }
3715 self->handle_close = PyObject_GetAttrString(target, "close");
3716 if (ignore_attribute_error(self->handle_close)) {
3717 return -1;
3718 }
3719 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3720 if (ignore_attribute_error(self->handle_doctype)) {
3721 return -1;
3722 }
3723
3724 /* configure parser */
3725 EXPAT(SetUserData)(self->parser, self);
3726 if (self->handle_start_ns || self->handle_end_ns)
3727 EXPAT(SetNamespaceDeclHandler)(
3728 self->parser,
3729 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3730 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3731 );
3732 EXPAT(SetElementHandler)(
3733 self->parser,
3734 (XML_StartElementHandler) expat_start_handler,
3735 (XML_EndElementHandler) expat_end_handler
3736 );
3737 EXPAT(SetDefaultHandlerExpand)(
3738 self->parser,
3739 (XML_DefaultHandler) expat_default_handler
3740 );
3741 EXPAT(SetCharacterDataHandler)(
3742 self->parser,
3743 (XML_CharacterDataHandler) expat_data_handler
3744 );
3745 if (self->handle_comment)
3746 EXPAT(SetCommentHandler)(
3747 self->parser,
3748 (XML_CommentHandler) expat_comment_handler
3749 );
3750 if (self->handle_pi)
3751 EXPAT(SetProcessingInstructionHandler)(
3752 self->parser,
3753 (XML_ProcessingInstructionHandler) expat_pi_handler
3754 );
3755 EXPAT(SetStartDoctypeDeclHandler)(
3756 self->parser,
3757 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3758 );
3759 EXPAT(SetUnknownEncodingHandler)(
3760 self->parser,
3761 EXPAT(DefaultUnknownEncodingHandler), NULL
3762 );
3763
3764 return 0;
3765 }
3766
3767 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3768 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3769 {
3770 Py_VISIT(self->handle_close);
3771 Py_VISIT(self->handle_pi);
3772 Py_VISIT(self->handle_comment);
3773 Py_VISIT(self->handle_end);
3774 Py_VISIT(self->handle_data);
3775 Py_VISIT(self->handle_start);
3776 Py_VISIT(self->handle_start_ns);
3777 Py_VISIT(self->handle_end_ns);
3778 Py_VISIT(self->handle_doctype);
3779
3780 Py_VISIT(self->target);
3781 Py_VISIT(self->entity);
3782 Py_VISIT(self->names);
3783
3784 return 0;
3785 }
3786
3787 static int
xmlparser_gc_clear(XMLParserObject * self)3788 xmlparser_gc_clear(XMLParserObject *self)
3789 {
3790 if (self->parser != NULL) {
3791 XML_Parser parser = self->parser;
3792 self->parser = NULL;
3793 EXPAT(ParserFree)(parser);
3794 }
3795
3796 Py_CLEAR(self->handle_close);
3797 Py_CLEAR(self->handle_pi);
3798 Py_CLEAR(self->handle_comment);
3799 Py_CLEAR(self->handle_end);
3800 Py_CLEAR(self->handle_data);
3801 Py_CLEAR(self->handle_start);
3802 Py_CLEAR(self->handle_start_ns);
3803 Py_CLEAR(self->handle_end_ns);
3804 Py_CLEAR(self->handle_doctype);
3805
3806 Py_CLEAR(self->target);
3807 Py_CLEAR(self->entity);
3808 Py_CLEAR(self->names);
3809
3810 return 0;
3811 }
3812
3813 static void
xmlparser_dealloc(XMLParserObject * self)3814 xmlparser_dealloc(XMLParserObject* self)
3815 {
3816 PyObject_GC_UnTrack(self);
3817 xmlparser_gc_clear(self);
3818 Py_TYPE(self)->tp_free((PyObject *)self);
3819 }
3820
3821 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3822 _check_xmlparser(XMLParserObject* self)
3823 {
3824 if (self->target == NULL) {
3825 PyErr_SetString(PyExc_ValueError,
3826 "XMLParser.__init__() wasn't called");
3827 return 0;
3828 }
3829 return 1;
3830 }
3831
3832 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3833 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3834 {
3835 int ok;
3836
3837 assert(!PyErr_Occurred());
3838 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3839
3840 if (PyErr_Occurred())
3841 return NULL;
3842
3843 if (!ok) {
3844 expat_set_error(
3845 EXPAT(GetErrorCode)(self->parser),
3846 EXPAT(GetErrorLineNumber)(self->parser),
3847 EXPAT(GetErrorColumnNumber)(self->parser),
3848 NULL
3849 );
3850 return NULL;
3851 }
3852
3853 Py_RETURN_NONE;
3854 }
3855
3856 /*[clinic input]
3857 _elementtree.XMLParser.close
3858
3859 [clinic start generated code]*/
3860
3861 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3862 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3863 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3864 {
3865 /* end feeding data to parser */
3866
3867 PyObject* res;
3868
3869 if (!_check_xmlparser(self)) {
3870 return NULL;
3871 }
3872 res = expat_parse(self, "", 0, 1);
3873 if (!res)
3874 return NULL;
3875
3876 if (TreeBuilder_CheckExact(self->target)) {
3877 Py_DECREF(res);
3878 return treebuilder_done((TreeBuilderObject*) self->target);
3879 }
3880 else if (self->handle_close) {
3881 Py_DECREF(res);
3882 return PyObject_CallNoArgs(self->handle_close);
3883 }
3884 else {
3885 return res;
3886 }
3887 }
3888
3889 /*[clinic input]
3890 _elementtree.XMLParser.feed
3891
3892 data: object
3893 /
3894
3895 [clinic start generated code]*/
3896
3897 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3898 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3899 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3900 {
3901 /* feed data to parser */
3902
3903 if (!_check_xmlparser(self)) {
3904 return NULL;
3905 }
3906 if (PyUnicode_Check(data)) {
3907 Py_ssize_t data_len;
3908 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3909 if (data_ptr == NULL)
3910 return NULL;
3911 if (data_len > INT_MAX) {
3912 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3913 return NULL;
3914 }
3915 /* Explicitly set UTF-8 encoding. Return code ignored. */
3916 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3917 return expat_parse(self, data_ptr, (int)data_len, 0);
3918 }
3919 else {
3920 Py_buffer view;
3921 PyObject *res;
3922 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3923 return NULL;
3924 if (view.len > INT_MAX) {
3925 PyBuffer_Release(&view);
3926 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3927 return NULL;
3928 }
3929 res = expat_parse(self, view.buf, (int)view.len, 0);
3930 PyBuffer_Release(&view);
3931 return res;
3932 }
3933 }
3934
3935 /*[clinic input]
3936 _elementtree.XMLParser._parse_whole
3937
3938 file: object
3939 /
3940
3941 [clinic start generated code]*/
3942
3943 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3944 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3945 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3946 {
3947 /* (internal) parse the whole input, until end of stream */
3948 PyObject* reader;
3949 PyObject* buffer;
3950 PyObject* temp;
3951 PyObject* res;
3952
3953 if (!_check_xmlparser(self)) {
3954 return NULL;
3955 }
3956 reader = PyObject_GetAttrString(file, "read");
3957 if (!reader)
3958 return NULL;
3959
3960 /* read from open file object */
3961 for (;;) {
3962
3963 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3964
3965 if (!buffer) {
3966 /* read failed (e.g. due to KeyboardInterrupt) */
3967 Py_DECREF(reader);
3968 return NULL;
3969 }
3970
3971 if (PyUnicode_CheckExact(buffer)) {
3972 /* A unicode object is encoded into bytes using UTF-8 */
3973 if (PyUnicode_GET_LENGTH(buffer) == 0) {
3974 Py_DECREF(buffer);
3975 break;
3976 }
3977 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3978 Py_DECREF(buffer);
3979 if (!temp) {
3980 /* Propagate exception from PyUnicode_AsEncodedString */
3981 Py_DECREF(reader);
3982 return NULL;
3983 }
3984 buffer = temp;
3985 }
3986 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3987 Py_DECREF(buffer);
3988 break;
3989 }
3990
3991 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3992 Py_DECREF(buffer);
3993 Py_DECREF(reader);
3994 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3995 return NULL;
3996 }
3997 res = expat_parse(
3998 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3999 );
4000
4001 Py_DECREF(buffer);
4002
4003 if (!res) {
4004 Py_DECREF(reader);
4005 return NULL;
4006 }
4007 Py_DECREF(res);
4008
4009 }
4010
4011 Py_DECREF(reader);
4012
4013 res = expat_parse(self, "", 0, 1);
4014
4015 if (res && TreeBuilder_CheckExact(self->target)) {
4016 Py_DECREF(res);
4017 return treebuilder_done((TreeBuilderObject*) self->target);
4018 }
4019
4020 return res;
4021 }
4022
4023 /*[clinic input]
4024 _elementtree.XMLParser._setevents
4025
4026 events_queue: object
4027 events_to_report: object = None
4028 /
4029
4030 [clinic start generated code]*/
4031
4032 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4033 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4034 PyObject *events_queue,
4035 PyObject *events_to_report)
4036 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4037 {
4038 /* activate element event reporting */
4039 Py_ssize_t i;
4040 TreeBuilderObject *target;
4041 PyObject *events_append, *events_seq;
4042
4043 if (!_check_xmlparser(self)) {
4044 return NULL;
4045 }
4046 if (!TreeBuilder_CheckExact(self->target)) {
4047 PyErr_SetString(
4048 PyExc_TypeError,
4049 "event handling only supported for ElementTree.TreeBuilder "
4050 "targets"
4051 );
4052 return NULL;
4053 }
4054
4055 target = (TreeBuilderObject*) self->target;
4056
4057 events_append = PyObject_GetAttrString(events_queue, "append");
4058 if (events_append == NULL)
4059 return NULL;
4060 Py_XSETREF(target->events_append, events_append);
4061
4062 /* clear out existing events */
4063 Py_CLEAR(target->start_event_obj);
4064 Py_CLEAR(target->end_event_obj);
4065 Py_CLEAR(target->start_ns_event_obj);
4066 Py_CLEAR(target->end_ns_event_obj);
4067 Py_CLEAR(target->comment_event_obj);
4068 Py_CLEAR(target->pi_event_obj);
4069
4070 if (events_to_report == Py_None) {
4071 /* default is "end" only */
4072 target->end_event_obj = PyUnicode_FromString("end");
4073 Py_RETURN_NONE;
4074 }
4075
4076 if (!(events_seq = PySequence_Fast(events_to_report,
4077 "events must be a sequence"))) {
4078 return NULL;
4079 }
4080
4081 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4082 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4083 const char *event_name = NULL;
4084 if (PyUnicode_Check(event_name_obj)) {
4085 event_name = PyUnicode_AsUTF8(event_name_obj);
4086 } else if (PyBytes_Check(event_name_obj)) {
4087 event_name = PyBytes_AS_STRING(event_name_obj);
4088 }
4089 if (event_name == NULL) {
4090 Py_DECREF(events_seq);
4091 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4092 return NULL;
4093 }
4094
4095 Py_INCREF(event_name_obj);
4096 if (strcmp(event_name, "start") == 0) {
4097 Py_XSETREF(target->start_event_obj, event_name_obj);
4098 } else if (strcmp(event_name, "end") == 0) {
4099 Py_XSETREF(target->end_event_obj, event_name_obj);
4100 } else if (strcmp(event_name, "start-ns") == 0) {
4101 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4102 EXPAT(SetNamespaceDeclHandler)(
4103 self->parser,
4104 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4105 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4106 );
4107 } else if (strcmp(event_name, "end-ns") == 0) {
4108 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4109 EXPAT(SetNamespaceDeclHandler)(
4110 self->parser,
4111 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4112 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4113 );
4114 } else if (strcmp(event_name, "comment") == 0) {
4115 Py_XSETREF(target->comment_event_obj, event_name_obj);
4116 EXPAT(SetCommentHandler)(
4117 self->parser,
4118 (XML_CommentHandler) expat_comment_handler
4119 );
4120 } else if (strcmp(event_name, "pi") == 0) {
4121 Py_XSETREF(target->pi_event_obj, event_name_obj);
4122 EXPAT(SetProcessingInstructionHandler)(
4123 self->parser,
4124 (XML_ProcessingInstructionHandler) expat_pi_handler
4125 );
4126 } else {
4127 Py_DECREF(event_name_obj);
4128 Py_DECREF(events_seq);
4129 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4130 return NULL;
4131 }
4132 }
4133
4134 Py_DECREF(events_seq);
4135 Py_RETURN_NONE;
4136 }
4137
4138 static PyMemberDef xmlparser_members[] = {
4139 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4140 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4141 {NULL}
4142 };
4143
4144 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4145 xmlparser_version_getter(XMLParserObject *self, void *closure)
4146 {
4147 return PyUnicode_FromFormat(
4148 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4149 XML_MINOR_VERSION, XML_MICRO_VERSION);
4150 }
4151
4152 static PyGetSetDef xmlparser_getsetlist[] = {
4153 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4154 {NULL},
4155 };
4156
4157 #include "clinic/_elementtree.c.h"
4158
4159 static PyMethodDef element_methods[] = {
4160
4161 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4162
4163 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4164 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4165
4166 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4167 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4168 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4169
4170 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4171 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4172 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4173 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4174
4175 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4176 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4177 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4178
4179 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4180 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4181
4182 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4183
4184 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4185 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4186 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4187 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4188 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4189
4190 {NULL, NULL}
4191 };
4192
4193 static PyMappingMethods element_as_mapping = {
4194 (lenfunc) element_length,
4195 (binaryfunc) element_subscr,
4196 (objobjargproc) element_ass_subscr,
4197 };
4198
4199 static PyGetSetDef element_getsetlist[] = {
4200 {"tag",
4201 (getter)element_tag_getter,
4202 (setter)element_tag_setter,
4203 "A string identifying what kind of data this element represents"},
4204 {"text",
4205 (getter)element_text_getter,
4206 (setter)element_text_setter,
4207 "A string of text directly after the start tag, or None"},
4208 {"tail",
4209 (getter)element_tail_getter,
4210 (setter)element_tail_setter,
4211 "A string of text directly after the end tag, or None"},
4212 {"attrib",
4213 (getter)element_attrib_getter,
4214 (setter)element_attrib_setter,
4215 "A dictionary containing the element's attributes"},
4216 {NULL},
4217 };
4218
4219 static PyTypeObject Element_Type = {
4220 PyVarObject_HEAD_INIT(NULL, 0)
4221 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4222 /* methods */
4223 (destructor)element_dealloc, /* tp_dealloc */
4224 0, /* tp_vectorcall_offset */
4225 0, /* tp_getattr */
4226 0, /* tp_setattr */
4227 0, /* tp_as_async */
4228 (reprfunc)element_repr, /* tp_repr */
4229 0, /* tp_as_number */
4230 &element_as_sequence, /* tp_as_sequence */
4231 &element_as_mapping, /* tp_as_mapping */
4232 0, /* tp_hash */
4233 0, /* tp_call */
4234 0, /* tp_str */
4235 PyObject_GenericGetAttr, /* tp_getattro */
4236 0, /* tp_setattro */
4237 0, /* tp_as_buffer */
4238 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4239 /* tp_flags */
4240 0, /* tp_doc */
4241 (traverseproc)element_gc_traverse, /* tp_traverse */
4242 (inquiry)element_gc_clear, /* tp_clear */
4243 0, /* tp_richcompare */
4244 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4245 0, /* tp_iter */
4246 0, /* tp_iternext */
4247 element_methods, /* tp_methods */
4248 0, /* tp_members */
4249 element_getsetlist, /* tp_getset */
4250 0, /* tp_base */
4251 0, /* tp_dict */
4252 0, /* tp_descr_get */
4253 0, /* tp_descr_set */
4254 0, /* tp_dictoffset */
4255 (initproc)element_init, /* tp_init */
4256 PyType_GenericAlloc, /* tp_alloc */
4257 element_new, /* tp_new */
4258 0, /* tp_free */
4259 };
4260
4261 static PyMethodDef treebuilder_methods[] = {
4262 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4263 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4264 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4265 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4266 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4267 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4268 {NULL, NULL}
4269 };
4270
4271 static PyTypeObject TreeBuilder_Type = {
4272 PyVarObject_HEAD_INIT(NULL, 0)
4273 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4274 /* methods */
4275 (destructor)treebuilder_dealloc, /* tp_dealloc */
4276 0, /* tp_vectorcall_offset */
4277 0, /* tp_getattr */
4278 0, /* tp_setattr */
4279 0, /* tp_as_async */
4280 0, /* tp_repr */
4281 0, /* tp_as_number */
4282 0, /* tp_as_sequence */
4283 0, /* tp_as_mapping */
4284 0, /* tp_hash */
4285 0, /* tp_call */
4286 0, /* tp_str */
4287 0, /* tp_getattro */
4288 0, /* tp_setattro */
4289 0, /* tp_as_buffer */
4290 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4291 /* tp_flags */
4292 0, /* tp_doc */
4293 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4294 (inquiry)treebuilder_gc_clear, /* tp_clear */
4295 0, /* tp_richcompare */
4296 0, /* tp_weaklistoffset */
4297 0, /* tp_iter */
4298 0, /* tp_iternext */
4299 treebuilder_methods, /* tp_methods */
4300 0, /* tp_members */
4301 0, /* tp_getset */
4302 0, /* tp_base */
4303 0, /* tp_dict */
4304 0, /* tp_descr_get */
4305 0, /* tp_descr_set */
4306 0, /* tp_dictoffset */
4307 _elementtree_TreeBuilder___init__, /* tp_init */
4308 PyType_GenericAlloc, /* tp_alloc */
4309 treebuilder_new, /* tp_new */
4310 0, /* tp_free */
4311 };
4312
4313 static PyMethodDef xmlparser_methods[] = {
4314 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4315 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4316 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4317 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4318 {NULL, NULL}
4319 };
4320
4321 static PyTypeObject XMLParser_Type = {
4322 PyVarObject_HEAD_INIT(NULL, 0)
4323 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4324 /* methods */
4325 (destructor)xmlparser_dealloc, /* tp_dealloc */
4326 0, /* tp_vectorcall_offset */
4327 0, /* tp_getattr */
4328 0, /* tp_setattr */
4329 0, /* tp_as_async */
4330 0, /* tp_repr */
4331 0, /* tp_as_number */
4332 0, /* tp_as_sequence */
4333 0, /* tp_as_mapping */
4334 0, /* tp_hash */
4335 0, /* tp_call */
4336 0, /* tp_str */
4337 0, /* tp_getattro */
4338 0, /* tp_setattro */
4339 0, /* tp_as_buffer */
4340 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4341 /* tp_flags */
4342 0, /* tp_doc */
4343 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4344 (inquiry)xmlparser_gc_clear, /* tp_clear */
4345 0, /* tp_richcompare */
4346 0, /* tp_weaklistoffset */
4347 0, /* tp_iter */
4348 0, /* tp_iternext */
4349 xmlparser_methods, /* tp_methods */
4350 xmlparser_members, /* tp_members */
4351 xmlparser_getsetlist, /* tp_getset */
4352 0, /* tp_base */
4353 0, /* tp_dict */
4354 0, /* tp_descr_get */
4355 0, /* tp_descr_set */
4356 0, /* tp_dictoffset */
4357 _elementtree_XMLParser___init__, /* tp_init */
4358 PyType_GenericAlloc, /* tp_alloc */
4359 xmlparser_new, /* tp_new */
4360 0, /* tp_free */
4361 };
4362
4363 /* ==================================================================== */
4364 /* python module interface */
4365
4366 static PyMethodDef _functions[] = {
4367 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
4368 _ELEMENTTREE__SET_FACTORIES_METHODDEF
4369 {NULL, NULL}
4370 };
4371
4372
4373 static struct PyModuleDef elementtreemodule = {
4374 PyModuleDef_HEAD_INIT,
4375 "_elementtree",
4376 NULL,
4377 sizeof(elementtreestate),
4378 _functions,
4379 NULL,
4380 elementtree_traverse,
4381 elementtree_clear,
4382 elementtree_free
4383 };
4384
4385 PyMODINIT_FUNC
PyInit__elementtree(void)4386 PyInit__elementtree(void)
4387 {
4388 PyObject *m, *temp;
4389 elementtreestate *st;
4390
4391 m = PyState_FindModule(&elementtreemodule);
4392 if (m) {
4393 Py_INCREF(m);
4394 return m;
4395 }
4396
4397 /* Initialize object types */
4398 if (PyType_Ready(&ElementIter_Type) < 0)
4399 return NULL;
4400 if (PyType_Ready(&TreeBuilder_Type) < 0)
4401 return NULL;
4402 if (PyType_Ready(&Element_Type) < 0)
4403 return NULL;
4404 if (PyType_Ready(&XMLParser_Type) < 0)
4405 return NULL;
4406
4407 m = PyModule_Create(&elementtreemodule);
4408 if (!m)
4409 return NULL;
4410 st = get_elementtree_state(m);
4411
4412 if (!(temp = PyImport_ImportModule("copy")))
4413 return NULL;
4414 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4415 Py_XDECREF(temp);
4416
4417 if (st->deepcopy_obj == NULL) {
4418 return NULL;
4419 }
4420
4421 assert(!PyErr_Occurred());
4422 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4423 return NULL;
4424
4425 /* link against pyexpat */
4426 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4427 if (expat_capi) {
4428 /* check that it's usable */
4429 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4430 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4431 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4432 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4433 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4434 PyErr_SetString(PyExc_ImportError,
4435 "pyexpat version is incompatible");
4436 return NULL;
4437 }
4438 } else {
4439 return NULL;
4440 }
4441
4442 st->parseerror_obj = PyErr_NewException(
4443 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4444 );
4445 Py_INCREF(st->parseerror_obj);
4446 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4447 Py_DECREF(st->parseerror_obj);
4448 return NULL;
4449 }
4450
4451 PyTypeObject *types[] = {
4452 &Element_Type,
4453 &TreeBuilder_Type,
4454 &XMLParser_Type
4455 };
4456
4457 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4458 if (PyModule_AddType(m, types[i]) < 0) {
4459 return NULL;
4460 }
4461 }
4462
4463 return m;
4464 }
4465