1 /*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * info@pythonware.com
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14 #define PY_SSIZE_T_CLEAN
15
16 #include "Python.h"
17 #include "structmember.h"
18
19 /* -------------------------------------------------------------------- */
20 /* configuration */
21
22 /* An element can hold this many children without extra memory
23 allocations. */
24 #define STATIC_CHILDREN 4
25
26 /* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31 /* Also note that pymalloc always allocates blocks in multiples of
32 eight bytes. For the current C version of ElementTree, this means
33 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36 /* -------------------------------------------------------------------- */
37
38 #if 0
39 static int memory = 0;
40 #define ALLOC(size, comment)\
41 do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42 #define RELEASE(size, comment)\
43 do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44 #else
45 #define ALLOC(size, comment)
46 #define RELEASE(size, comment)
47 #endif
48
49 /* compiler tweaks */
50 #if defined(_MSC_VER)
51 #define LOCAL(type) static __inline type __fastcall
52 #else
53 #define LOCAL(type) static type
54 #endif
55
56 /* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
61 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
63
64 /* Py_SETREF for a PyObject* that uses a join flag. */
65 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)66 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67 {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71 }
72
73 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75 */
_clear_joined_ptr(PyObject ** p)76 static void _clear_joined_ptr(PyObject **p)
77 {
78 if (*p) {
79 _set_joined_ptr(p, NULL);
80 }
81 }
82
83 /* Types defined by this extension */
84 static PyTypeObject Element_Type;
85 static PyTypeObject ElementIter_Type;
86 static PyTypeObject TreeBuilder_Type;
87 static PyTypeObject XMLParser_Type;
88
89
90 /* Per-module state; PEP 3121 */
91 typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95 } elementtreestate;
96
97 static struct PyModuleDef elementtreemodule;
98
99 /* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102 #define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104 /* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107 #define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110 static int
elementtree_clear(PyObject * m)111 elementtree_clear(PyObject *m)
112 {
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118 }
119
120 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)121 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122 {
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128 }
129
130 static void
elementtree_free(void * m)131 elementtree_free(void *m)
132 {
133 elementtree_clear((PyObject *)m);
134 }
135
136 /* helpers */
137
138 LOCAL(PyObject*)
list_join(PyObject * list)139 list_join(PyObject* list)
140 {
141 /* join list elements */
142 PyObject* joiner;
143 PyObject* result;
144
145 joiner = PyUnicode_FromStringAndSize("", 0);
146 if (!joiner)
147 return NULL;
148 result = PyUnicode_Join(joiner, list);
149 Py_DECREF(joiner);
150 return result;
151 }
152
153 /* Is the given object an empty dictionary?
154 */
155 static int
is_empty_dict(PyObject * obj)156 is_empty_dict(PyObject *obj)
157 {
158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
159 }
160
161
162 /* -------------------------------------------------------------------- */
163 /* the Element type */
164
165 typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
178
179 } ElementObjectExtra;
180
181 typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
203 } ElementObject;
204
205
206 #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
207 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
209
210 /* -------------------------------------------------------------------- */
211 /* Element constructors and destructor */
212
213 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)214 create_extra(ElementObject* self, PyObject* attrib)
215 {
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
217 if (!self->extra) {
218 PyErr_NoMemory();
219 return -1;
220 }
221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233 }
234
235 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)236 dealloc_extra(ElementObjectExtra *extra)
237 {
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252 }
253
254 LOCAL(void)
clear_extra(ElementObject * self)255 clear_extra(ElementObject* self)
256 {
257 ElementObjectExtra *myextra;
258
259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
264 myextra = self->extra;
265 self->extra = NULL;
266
267 dealloc_extra(myextra);
268 }
269
270 /* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272 */
273 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)274 create_new_element(PyObject* tag, PyObject* attrib)
275 {
276 ElementObject* self;
277
278 self = PyObject_GC_New(ElementObject, &Element_Type);
279 if (self == NULL)
280 return NULL;
281 self->extra = NULL;
282
283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
292 self->weakreflist = NULL;
293
294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
299 Py_DECREF(self);
300 return NULL;
301 }
302 }
303
304 return (PyObject*) self;
305 }
306
307 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)308 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309 {
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
322 e->weakreflist = NULL;
323 }
324 return (PyObject *)e;
325 }
326
327 /* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
329 * either accept attrib as a keyword argument or all attributes splashed
330 * directly into *kwds.
331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
334 */
335 static PyObject*
get_attrib_from_keywords(PyObject * kwds)336 get_attrib_from_keywords(PyObject *kwds)
337 {
338 PyObject *attrib_str = PyUnicode_FromString("attrib");
339 if (attrib_str == NULL) {
340 return NULL;
341 }
342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
349 Py_DECREF(attrib_str);
350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
359 } else {
360 attrib = PyDict_New();
361 }
362
363 Py_DECREF(attrib_str);
364
365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366 Py_DECREF(attrib);
367 return NULL;
368 }
369 return attrib;
370 }
371
372 /*[clinic input]
373 module _elementtree
374 class _elementtree.Element "ElementObject *" "&Element_Type"
375 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377 [clinic start generated code]*/
378 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379
380 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)381 element_init(PyObject *self, PyObject *args, PyObject *kwds)
382 {
383 PyObject *tag;
384 PyObject *attrib = NULL;
385 ElementObject *self_elem;
386
387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388 return -1;
389
390 if (attrib) {
391 /* attrib passed as positional arg */
392 attrib = PyDict_Copy(attrib);
393 if (!attrib)
394 return -1;
395 if (kwds) {
396 if (PyDict_Update(attrib, kwds) < 0) {
397 Py_DECREF(attrib);
398 return -1;
399 }
400 }
401 } else if (kwds) {
402 /* have keywords args */
403 attrib = get_attrib_from_keywords(kwds);
404 if (!attrib)
405 return -1;
406 }
407
408 self_elem = (ElementObject *)self;
409
410 if (attrib != NULL && !is_empty_dict(attrib)) {
411 if (create_extra(self_elem, attrib) < 0) {
412 Py_DECREF(attrib);
413 return -1;
414 }
415 }
416
417 /* We own a reference to attrib here and it's no longer needed. */
418 Py_XDECREF(attrib);
419
420 /* Replace the objects already pointed to by tag, text and tail. */
421 Py_INCREF(tag);
422 Py_XSETREF(self_elem->tag, tag);
423
424 Py_INCREF(Py_None);
425 _set_joined_ptr(&self_elem->text, Py_None);
426
427 Py_INCREF(Py_None);
428 _set_joined_ptr(&self_elem->tail, Py_None);
429
430 return 0;
431 }
432
433 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)434 element_resize(ElementObject* self, Py_ssize_t extra)
435 {
436 Py_ssize_t size;
437 PyObject* *children;
438
439 assert(extra >= 0);
440 /* make sure self->children can hold the given number of extra
441 elements. set an exception and return -1 if allocation failed */
442
443 if (!self->extra) {
444 if (create_extra(self, NULL) < 0)
445 return -1;
446 }
447
448 size = self->extra->length + extra; /* never overflows */
449
450 if (size > self->extra->allocated) {
451 /* use Python 2.4's list growth strategy */
452 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
454 * which needs at least 4 bytes.
455 * Although it's a false alarm always assume at least one child to
456 * be safe.
457 */
458 size = size ? size : 1;
459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460 goto nomemory;
461 if (self->extra->children != self->extra->_children) {
462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
463 * "children", which needs at least 4 bytes. Although it's a
464 * false alarm always assume at least one child to be safe.
465 */
466 children = PyObject_Realloc(self->extra->children,
467 size * sizeof(PyObject*));
468 if (!children)
469 goto nomemory;
470 } else {
471 children = PyObject_Malloc(size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 /* copy existing children from static area to malloc buffer */
475 memcpy(children, self->extra->children,
476 self->extra->length * sizeof(PyObject*));
477 }
478 self->extra->children = children;
479 self->extra->allocated = size;
480 }
481
482 return 0;
483
484 nomemory:
485 PyErr_NoMemory();
486 return -1;
487 }
488
489 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)490 element_add_subelement(ElementObject* self, PyObject* element)
491 {
492 /* add a child element to a parent */
493
494 if (element_resize(self, 1) < 0)
495 return -1;
496
497 Py_INCREF(element);
498 self->extra->children[self->extra->length] = element;
499
500 self->extra->length++;
501
502 return 0;
503 }
504
505 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)506 element_get_attrib(ElementObject* self)
507 {
508 /* return borrowed reference to attrib dictionary */
509 /* note: this function assumes that the extra section exists */
510
511 PyObject* res = self->extra->attrib;
512
513 if (res == Py_None) {
514 /* create missing dictionary */
515 res = PyDict_New();
516 if (!res)
517 return NULL;
518 Py_DECREF(Py_None);
519 self->extra->attrib = res;
520 }
521
522 return res;
523 }
524
525 LOCAL(PyObject*)
element_get_text(ElementObject * self)526 element_get_text(ElementObject* self)
527 {
528 /* return borrowed reference to text attribute */
529
530 PyObject *res = self->text;
531
532 if (JOIN_GET(res)) {
533 res = JOIN_OBJ(res);
534 if (PyList_CheckExact(res)) {
535 PyObject *tmp = list_join(res);
536 if (!tmp)
537 return NULL;
538 self->text = tmp;
539 Py_DECREF(res);
540 res = tmp;
541 }
542 }
543
544 return res;
545 }
546
547 LOCAL(PyObject*)
element_get_tail(ElementObject * self)548 element_get_tail(ElementObject* self)
549 {
550 /* return borrowed reference to text attribute */
551
552 PyObject *res = self->tail;
553
554 if (JOIN_GET(res)) {
555 res = JOIN_OBJ(res);
556 if (PyList_CheckExact(res)) {
557 PyObject *tmp = list_join(res);
558 if (!tmp)
559 return NULL;
560 self->tail = tmp;
561 Py_DECREF(res);
562 res = tmp;
563 }
564 }
565
566 return res;
567 }
568
569 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)570 subelement(PyObject *self, PyObject *args, PyObject *kwds)
571 {
572 PyObject* elem;
573
574 ElementObject* parent;
575 PyObject* tag;
576 PyObject* attrib = NULL;
577 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
578 &Element_Type, &parent, &tag,
579 &PyDict_Type, &attrib)) {
580 return NULL;
581 }
582
583 if (attrib) {
584 /* attrib passed as positional arg */
585 attrib = PyDict_Copy(attrib);
586 if (!attrib)
587 return NULL;
588 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
589 Py_DECREF(attrib);
590 return NULL;
591 }
592 } else if (kwds) {
593 /* have keyword args */
594 attrib = get_attrib_from_keywords(kwds);
595 if (!attrib)
596 return NULL;
597 } else {
598 /* no attrib arg, no kwds, so no attribute */
599 Py_INCREF(Py_None);
600 attrib = Py_None;
601 }
602
603 elem = create_new_element(tag, attrib);
604 Py_DECREF(attrib);
605 if (elem == NULL)
606 return NULL;
607
608 if (element_add_subelement(parent, elem) < 0) {
609 Py_DECREF(elem);
610 return NULL;
611 }
612
613 return elem;
614 }
615
616 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)617 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
618 {
619 Py_VISIT(self->tag);
620 Py_VISIT(JOIN_OBJ(self->text));
621 Py_VISIT(JOIN_OBJ(self->tail));
622
623 if (self->extra) {
624 Py_ssize_t i;
625 Py_VISIT(self->extra->attrib);
626
627 for (i = 0; i < self->extra->length; ++i)
628 Py_VISIT(self->extra->children[i]);
629 }
630 return 0;
631 }
632
633 static int
element_gc_clear(ElementObject * self)634 element_gc_clear(ElementObject *self)
635 {
636 Py_CLEAR(self->tag);
637 _clear_joined_ptr(&self->text);
638 _clear_joined_ptr(&self->tail);
639
640 /* After dropping all references from extra, it's no longer valid anyway,
641 * so fully deallocate it.
642 */
643 clear_extra(self);
644 return 0;
645 }
646
647 static void
element_dealloc(ElementObject * self)648 element_dealloc(ElementObject* self)
649 {
650 /* bpo-31095: UnTrack is needed before calling any callbacks */
651 PyObject_GC_UnTrack(self);
652 Py_TRASHCAN_SAFE_BEGIN(self)
653
654 if (self->weakreflist != NULL)
655 PyObject_ClearWeakRefs((PyObject *) self);
656
657 /* element_gc_clear clears all references and deallocates extra
658 */
659 element_gc_clear(self);
660
661 RELEASE(sizeof(ElementObject), "destroy element");
662 Py_TYPE(self)->tp_free((PyObject *)self);
663 Py_TRASHCAN_SAFE_END(self)
664 }
665
666 /* -------------------------------------------------------------------- */
667
668 /*[clinic input]
669 _elementtree.Element.append
670
671 subelement: object(subclass_of='&Element_Type')
672 /
673
674 [clinic start generated code]*/
675
676 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)677 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
678 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
679 {
680 if (element_add_subelement(self, subelement) < 0)
681 return NULL;
682
683 Py_RETURN_NONE;
684 }
685
686 /*[clinic input]
687 _elementtree.Element.clear
688
689 [clinic start generated code]*/
690
691 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)692 _elementtree_Element_clear_impl(ElementObject *self)
693 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
694 {
695 clear_extra(self);
696
697 Py_INCREF(Py_None);
698 _set_joined_ptr(&self->text, Py_None);
699
700 Py_INCREF(Py_None);
701 _set_joined_ptr(&self->tail, Py_None);
702
703 Py_RETURN_NONE;
704 }
705
706 /*[clinic input]
707 _elementtree.Element.__copy__
708
709 [clinic start generated code]*/
710
711 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)712 _elementtree_Element___copy___impl(ElementObject *self)
713 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
714 {
715 Py_ssize_t i;
716 ElementObject* element;
717
718 element = (ElementObject*) create_new_element(
719 self->tag, (self->extra) ? self->extra->attrib : Py_None);
720 if (!element)
721 return NULL;
722
723 Py_INCREF(JOIN_OBJ(self->text));
724 _set_joined_ptr(&element->text, self->text);
725
726 Py_INCREF(JOIN_OBJ(self->tail));
727 _set_joined_ptr(&element->tail, self->tail);
728
729 assert(!element->extra || !element->extra->length);
730 if (self->extra) {
731 if (element_resize(element, self->extra->length) < 0) {
732 Py_DECREF(element);
733 return NULL;
734 }
735
736 for (i = 0; i < self->extra->length; i++) {
737 Py_INCREF(self->extra->children[i]);
738 element->extra->children[i] = self->extra->children[i];
739 }
740
741 assert(!element->extra->length);
742 element->extra->length = self->extra->length;
743 }
744
745 return (PyObject*) element;
746 }
747
748 /* Helper for a deep copy. */
749 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
750
751 /*[clinic input]
752 _elementtree.Element.__deepcopy__
753
754 memo: object(subclass_of="&PyDict_Type")
755 /
756
757 [clinic start generated code]*/
758
759 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)760 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
761 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
762 {
763 Py_ssize_t i;
764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
786 element = (ElementObject*) create_new_element(tag, attrib);
787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
793
794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
798
799 tail = deepcopy(JOIN_OBJ(self->tail), memo);
800 if (!tail)
801 goto error;
802 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
803
804 assert(!element->extra || !element->extra->length);
805 if (self->extra) {
806 if (element_resize(element, self->extra->length) < 0)
807 goto error;
808
809 for (i = 0; i < self->extra->length; i++) {
810 PyObject* child = deepcopy(self->extra->children[i], memo);
811 if (!child) {
812 element->extra->length = i;
813 goto error;
814 }
815 element->extra->children[i] = child;
816 }
817
818 assert(!element->extra->length);
819 element->extra->length = self->extra->length;
820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
823 id = PyLong_FromSsize_t((uintptr_t) self);
824 if (!id)
825 goto error;
826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839 }
840
841 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)842 deepcopy(PyObject *object, PyObject *memo)
843 {
844 /* do a deep copy of the given object */
845 elementtreestate *st;
846 PyObject *stack[2];
847
848 /* Fast paths */
849 if (object == Py_None || PyUnicode_CheckExact(object)) {
850 Py_INCREF(object);
851 return object;
852 }
853
854 if (Py_REFCNT(object) == 1) {
855 if (PyDict_CheckExact(object)) {
856 PyObject *key, *value;
857 Py_ssize_t pos = 0;
858 int simple = 1;
859 while (PyDict_Next(object, &pos, &key, &value)) {
860 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
861 simple = 0;
862 break;
863 }
864 }
865 if (simple)
866 return PyDict_Copy(object);
867 /* Fall through to general case */
868 }
869 else if (Element_CheckExact(object)) {
870 return _elementtree_Element___deepcopy___impl(
871 (ElementObject *)object, memo);
872 }
873 }
874
875 /* General case */
876 st = ET_STATE_GLOBAL;
877 if (!st->deepcopy_obj) {
878 PyErr_SetString(PyExc_RuntimeError,
879 "deepcopy helper not found");
880 return NULL;
881 }
882
883 stack[0] = object;
884 stack[1] = memo;
885 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
886 }
887
888
889 /*[clinic input]
890 _elementtree.Element.__sizeof__ -> Py_ssize_t
891
892 [clinic start generated code]*/
893
894 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)895 _elementtree_Element___sizeof___impl(ElementObject *self)
896 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
897 {
898 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
899 if (self->extra) {
900 result += sizeof(ElementObjectExtra);
901 if (self->extra->children != self->extra->_children)
902 result += sizeof(PyObject*) * self->extra->allocated;
903 }
904 return result;
905 }
906
907 /* dict keys for getstate/setstate. */
908 #define PICKLED_TAG "tag"
909 #define PICKLED_CHILDREN "_children"
910 #define PICKLED_ATTRIB "attrib"
911 #define PICKLED_TAIL "tail"
912 #define PICKLED_TEXT "text"
913
914 /* __getstate__ returns a fabricated instance dict as in the pure-Python
915 * Element implementation, for interoperability/interchangeability. This
916 * makes the pure-Python implementation details an API, but (a) there aren't
917 * any unnecessary structures there; and (b) it buys compatibility with 3.2
918 * pickles. See issue #16076.
919 */
920 /*[clinic input]
921 _elementtree.Element.__getstate__
922
923 [clinic start generated code]*/
924
925 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)926 _elementtree_Element___getstate___impl(ElementObject *self)
927 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
928 {
929 Py_ssize_t i;
930 PyObject *children, *attrib;
931
932 /* Build a list of children. */
933 children = PyList_New(self->extra ? self->extra->length : 0);
934 if (!children)
935 return NULL;
936 for (i = 0; i < PyList_GET_SIZE(children); i++) {
937 PyObject *child = self->extra->children[i];
938 Py_INCREF(child);
939 PyList_SET_ITEM(children, i, child);
940 }
941
942 if (self->extra && self->extra->attrib != Py_None) {
943 attrib = self->extra->attrib;
944 Py_INCREF(attrib);
945 }
946 else {
947 attrib = PyDict_New();
948 if (!attrib) {
949 Py_DECREF(children);
950 return NULL;
951 }
952 }
953
954 return Py_BuildValue("{sOsNsNsOsO}",
955 PICKLED_TAG, self->tag,
956 PICKLED_CHILDREN, children,
957 PICKLED_ATTRIB, attrib,
958 PICKLED_TEXT, JOIN_OBJ(self->text),
959 PICKLED_TAIL, JOIN_OBJ(self->tail));
960 }
961
962 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)963 element_setstate_from_attributes(ElementObject *self,
964 PyObject *tag,
965 PyObject *attrib,
966 PyObject *text,
967 PyObject *tail,
968 PyObject *children)
969 {
970 Py_ssize_t i, nchildren;
971 ElementObjectExtra *oldextra = NULL;
972
973 if (!tag) {
974 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
975 return NULL;
976 }
977
978 Py_INCREF(tag);
979 Py_XSETREF(self->tag, tag);
980
981 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
982 Py_INCREF(JOIN_OBJ(text));
983 _set_joined_ptr(&self->text, text);
984
985 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
986 Py_INCREF(JOIN_OBJ(tail));
987 _set_joined_ptr(&self->tail, tail);
988
989 /* Handle ATTRIB and CHILDREN. */
990 if (!children && !attrib) {
991 Py_RETURN_NONE;
992 }
993
994 /* Compute 'nchildren'. */
995 if (children) {
996 if (!PyList_Check(children)) {
997 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
998 return NULL;
999 }
1000 nchildren = PyList_GET_SIZE(children);
1001
1002 /* (Re-)allocate 'extra'.
1003 Avoid DECREFs calling into this code again (cycles, etc.)
1004 */
1005 oldextra = self->extra;
1006 self->extra = NULL;
1007 if (element_resize(self, nchildren)) {
1008 assert(!self->extra || !self->extra->length);
1009 clear_extra(self);
1010 self->extra = oldextra;
1011 return NULL;
1012 }
1013 assert(self->extra);
1014 assert(self->extra->allocated >= nchildren);
1015 if (oldextra) {
1016 assert(self->extra->attrib == Py_None);
1017 self->extra->attrib = oldextra->attrib;
1018 oldextra->attrib = Py_None;
1019 }
1020
1021 /* Copy children */
1022 for (i = 0; i < nchildren; i++) {
1023 self->extra->children[i] = PyList_GET_ITEM(children, i);
1024 Py_INCREF(self->extra->children[i]);
1025 }
1026
1027 assert(!self->extra->length);
1028 self->extra->length = nchildren;
1029 }
1030 else {
1031 if (element_resize(self, 0)) {
1032 return NULL;
1033 }
1034 }
1035
1036 /* Stash attrib. */
1037 if (attrib) {
1038 Py_INCREF(attrib);
1039 Py_XSETREF(self->extra->attrib, attrib);
1040 }
1041 dealloc_extra(oldextra);
1042
1043 Py_RETURN_NONE;
1044 }
1045
1046 /* __setstate__ for Element instance from the Python implementation.
1047 * 'state' should be the instance dict.
1048 */
1049
1050 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1051 element_setstate_from_Python(ElementObject *self, PyObject *state)
1052 {
1053 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1054 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1055 PyObject *args;
1056 PyObject *tag, *attrib, *text, *tail, *children;
1057 PyObject *retval;
1058
1059 tag = attrib = text = tail = children = NULL;
1060 args = PyTuple_New(0);
1061 if (!args)
1062 return NULL;
1063
1064 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1065 &attrib, &text, &tail, &children))
1066 retval = element_setstate_from_attributes(self, tag, attrib, text,
1067 tail, children);
1068 else
1069 retval = NULL;
1070
1071 Py_DECREF(args);
1072 return retval;
1073 }
1074
1075 /*[clinic input]
1076 _elementtree.Element.__setstate__
1077
1078 state: object
1079 /
1080
1081 [clinic start generated code]*/
1082
1083 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1084 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1085 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1086 {
1087 if (!PyDict_CheckExact(state)) {
1088 PyErr_Format(PyExc_TypeError,
1089 "Don't know how to unpickle \"%.200R\" as an Element",
1090 state);
1091 return NULL;
1092 }
1093 else
1094 return element_setstate_from_Python(self, state);
1095 }
1096
1097 LOCAL(int)
checkpath(PyObject * tag)1098 checkpath(PyObject* tag)
1099 {
1100 Py_ssize_t i;
1101 int check = 1;
1102
1103 /* check if a tag contains an xpath character */
1104
1105 #define PATHCHAR(ch) \
1106 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1107
1108 if (PyUnicode_Check(tag)) {
1109 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1110 void *data = PyUnicode_DATA(tag);
1111 unsigned int kind = PyUnicode_KIND(tag);
1112 for (i = 0; i < len; i++) {
1113 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1114 if (ch == '{')
1115 check = 0;
1116 else if (ch == '}')
1117 check = 1;
1118 else if (check && PATHCHAR(ch))
1119 return 1;
1120 }
1121 return 0;
1122 }
1123 if (PyBytes_Check(tag)) {
1124 char *p = PyBytes_AS_STRING(tag);
1125 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
1126 if (p[i] == '{')
1127 check = 0;
1128 else if (p[i] == '}')
1129 check = 1;
1130 else if (check && PATHCHAR(p[i]))
1131 return 1;
1132 }
1133 return 0;
1134 }
1135
1136 return 1; /* unknown type; might be path expression */
1137 }
1138
1139 /*[clinic input]
1140 _elementtree.Element.extend
1141
1142 elements: object
1143 /
1144
1145 [clinic start generated code]*/
1146
1147 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1148 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1149 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1150 {
1151 PyObject* seq;
1152 Py_ssize_t i;
1153
1154 seq = PySequence_Fast(elements, "");
1155 if (!seq) {
1156 PyErr_Format(
1157 PyExc_TypeError,
1158 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1159 );
1160 return NULL;
1161 }
1162
1163 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1164 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1165 Py_INCREF(element);
1166 if (!Element_Check(element)) {
1167 PyErr_Format(
1168 PyExc_TypeError,
1169 "expected an Element, not \"%.200s\"",
1170 Py_TYPE(element)->tp_name);
1171 Py_DECREF(seq);
1172 Py_DECREF(element);
1173 return NULL;
1174 }
1175
1176 if (element_add_subelement(self, element) < 0) {
1177 Py_DECREF(seq);
1178 Py_DECREF(element);
1179 return NULL;
1180 }
1181 Py_DECREF(element);
1182 }
1183
1184 Py_DECREF(seq);
1185
1186 Py_RETURN_NONE;
1187 }
1188
1189 /*[clinic input]
1190 _elementtree.Element.find
1191
1192 path: object
1193 namespaces: object = None
1194
1195 [clinic start generated code]*/
1196
1197 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1198 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1199 PyObject *namespaces)
1200 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1201 {
1202 Py_ssize_t i;
1203 elementtreestate *st = ET_STATE_GLOBAL;
1204
1205 if (checkpath(path) || namespaces != Py_None) {
1206 _Py_IDENTIFIER(find);
1207 return _PyObject_CallMethodIdObjArgs(
1208 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1209 );
1210 }
1211
1212 if (!self->extra)
1213 Py_RETURN_NONE;
1214
1215 for (i = 0; i < self->extra->length; i++) {
1216 PyObject* item = self->extra->children[i];
1217 int rc;
1218 if (!Element_Check(item))
1219 continue;
1220 Py_INCREF(item);
1221 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1222 if (rc > 0)
1223 return item;
1224 Py_DECREF(item);
1225 if (rc < 0)
1226 return NULL;
1227 }
1228
1229 Py_RETURN_NONE;
1230 }
1231
1232 /*[clinic input]
1233 _elementtree.Element.findtext
1234
1235 path: object
1236 default: object = None
1237 namespaces: object = None
1238
1239 [clinic start generated code]*/
1240
1241 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1242 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1243 PyObject *default_value,
1244 PyObject *namespaces)
1245 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1246 {
1247 Py_ssize_t i;
1248 _Py_IDENTIFIER(findtext);
1249 elementtreestate *st = ET_STATE_GLOBAL;
1250
1251 if (checkpath(path) || namespaces != Py_None)
1252 return _PyObject_CallMethodIdObjArgs(
1253 st->elementpath_obj, &PyId_findtext,
1254 self, path, default_value, namespaces, NULL
1255 );
1256
1257 if (!self->extra) {
1258 Py_INCREF(default_value);
1259 return default_value;
1260 }
1261
1262 for (i = 0; i < self->extra->length; i++) {
1263 PyObject *item = self->extra->children[i];
1264 int rc;
1265 if (!Element_Check(item))
1266 continue;
1267 Py_INCREF(item);
1268 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1269 if (rc > 0) {
1270 PyObject* text = element_get_text((ElementObject*)item);
1271 if (text == Py_None) {
1272 Py_DECREF(item);
1273 return PyUnicode_New(0, 0);
1274 }
1275 Py_XINCREF(text);
1276 Py_DECREF(item);
1277 return text;
1278 }
1279 Py_DECREF(item);
1280 if (rc < 0)
1281 return NULL;
1282 }
1283
1284 Py_INCREF(default_value);
1285 return default_value;
1286 }
1287
1288 /*[clinic input]
1289 _elementtree.Element.findall
1290
1291 path: object
1292 namespaces: object = None
1293
1294 [clinic start generated code]*/
1295
1296 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1297 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1298 PyObject *namespaces)
1299 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1300 {
1301 Py_ssize_t i;
1302 PyObject* out;
1303 elementtreestate *st = ET_STATE_GLOBAL;
1304
1305 if (checkpath(path) || namespaces != Py_None) {
1306 _Py_IDENTIFIER(findall);
1307 return _PyObject_CallMethodIdObjArgs(
1308 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1309 );
1310 }
1311
1312 out = PyList_New(0);
1313 if (!out)
1314 return NULL;
1315
1316 if (!self->extra)
1317 return out;
1318
1319 for (i = 0; i < self->extra->length; i++) {
1320 PyObject* item = self->extra->children[i];
1321 int rc;
1322 if (!Element_Check(item))
1323 continue;
1324 Py_INCREF(item);
1325 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1326 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1327 Py_DECREF(item);
1328 Py_DECREF(out);
1329 return NULL;
1330 }
1331 Py_DECREF(item);
1332 }
1333
1334 return out;
1335 }
1336
1337 /*[clinic input]
1338 _elementtree.Element.iterfind
1339
1340 path: object
1341 namespaces: object = None
1342
1343 [clinic start generated code]*/
1344
1345 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1346 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1347 PyObject *namespaces)
1348 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1349 {
1350 PyObject* tag = path;
1351 _Py_IDENTIFIER(iterfind);
1352 elementtreestate *st = ET_STATE_GLOBAL;
1353
1354 return _PyObject_CallMethodIdObjArgs(
1355 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1356 }
1357
1358 /*[clinic input]
1359 _elementtree.Element.get
1360
1361 key: object
1362 default: object = None
1363
1364 [clinic start generated code]*/
1365
1366 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1367 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1368 PyObject *default_value)
1369 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1370 {
1371 PyObject* value;
1372
1373 if (!self->extra || self->extra->attrib == Py_None)
1374 value = default_value;
1375 else {
1376 value = PyDict_GetItem(self->extra->attrib, key);
1377 if (!value)
1378 value = default_value;
1379 }
1380
1381 Py_INCREF(value);
1382 return value;
1383 }
1384
1385 /*[clinic input]
1386 _elementtree.Element.getchildren
1387
1388 [clinic start generated code]*/
1389
1390 static PyObject *
_elementtree_Element_getchildren_impl(ElementObject * self)1391 _elementtree_Element_getchildren_impl(ElementObject *self)
1392 /*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
1393 {
1394 Py_ssize_t i;
1395 PyObject* list;
1396
1397 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1398 "This method will be removed in future versions. "
1399 "Use 'list(elem)' or iteration over elem instead.",
1400 1) < 0) {
1401 return NULL;
1402 }
1403
1404 if (!self->extra)
1405 return PyList_New(0);
1406
1407 list = PyList_New(self->extra->length);
1408 if (!list)
1409 return NULL;
1410
1411 for (i = 0; i < self->extra->length; i++) {
1412 PyObject* item = self->extra->children[i];
1413 Py_INCREF(item);
1414 PyList_SET_ITEM(list, i, item);
1415 }
1416
1417 return list;
1418 }
1419
1420
1421 static PyObject *
1422 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1423
1424
1425 /*[clinic input]
1426 _elementtree.Element.iter
1427
1428 tag: object = None
1429
1430 [clinic start generated code]*/
1431
1432 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1433 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1434 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1435 {
1436 if (PyUnicode_Check(tag)) {
1437 if (PyUnicode_READY(tag) < 0)
1438 return NULL;
1439 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1440 tag = Py_None;
1441 }
1442 else if (PyBytes_Check(tag)) {
1443 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1444 tag = Py_None;
1445 }
1446
1447 return create_elementiter(self, tag, 0);
1448 }
1449
1450
1451 /*[clinic input]
1452 _elementtree.Element.getiterator
1453
1454 tag: object = None
1455
1456 [clinic start generated code]*/
1457
1458 static PyObject *
_elementtree_Element_getiterator_impl(ElementObject * self,PyObject * tag)1459 _elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1460 /*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1461 {
1462 /* Change for a DeprecationWarning in 1.4 */
1463 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1464 "This method will be removed in future versions. "
1465 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1466 1) < 0) {
1467 return NULL;
1468 }
1469 return _elementtree_Element_iter_impl(self, tag);
1470 }
1471
1472
1473 /*[clinic input]
1474 _elementtree.Element.itertext
1475
1476 [clinic start generated code]*/
1477
1478 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1479 _elementtree_Element_itertext_impl(ElementObject *self)
1480 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1481 {
1482 return create_elementiter(self, Py_None, 1);
1483 }
1484
1485
1486 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1487 element_getitem(PyObject* self_, Py_ssize_t index)
1488 {
1489 ElementObject* self = (ElementObject*) self_;
1490
1491 if (!self->extra || index < 0 || index >= self->extra->length) {
1492 PyErr_SetString(
1493 PyExc_IndexError,
1494 "child index out of range"
1495 );
1496 return NULL;
1497 }
1498
1499 Py_INCREF(self->extra->children[index]);
1500 return self->extra->children[index];
1501 }
1502
1503 /*[clinic input]
1504 _elementtree.Element.insert
1505
1506 index: Py_ssize_t
1507 subelement: object(subclass_of='&Element_Type')
1508 /
1509
1510 [clinic start generated code]*/
1511
1512 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1513 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1514 PyObject *subelement)
1515 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1516 {
1517 Py_ssize_t i;
1518
1519 if (!self->extra) {
1520 if (create_extra(self, NULL) < 0)
1521 return NULL;
1522 }
1523
1524 if (index < 0) {
1525 index += self->extra->length;
1526 if (index < 0)
1527 index = 0;
1528 }
1529 if (index > self->extra->length)
1530 index = self->extra->length;
1531
1532 if (element_resize(self, 1) < 0)
1533 return NULL;
1534
1535 for (i = self->extra->length; i > index; i--)
1536 self->extra->children[i] = self->extra->children[i-1];
1537
1538 Py_INCREF(subelement);
1539 self->extra->children[index] = subelement;
1540
1541 self->extra->length++;
1542
1543 Py_RETURN_NONE;
1544 }
1545
1546 /*[clinic input]
1547 _elementtree.Element.items
1548
1549 [clinic start generated code]*/
1550
1551 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1552 _elementtree_Element_items_impl(ElementObject *self)
1553 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1554 {
1555 if (!self->extra || self->extra->attrib == Py_None)
1556 return PyList_New(0);
1557
1558 return PyDict_Items(self->extra->attrib);
1559 }
1560
1561 /*[clinic input]
1562 _elementtree.Element.keys
1563
1564 [clinic start generated code]*/
1565
1566 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1567 _elementtree_Element_keys_impl(ElementObject *self)
1568 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1569 {
1570 if (!self->extra || self->extra->attrib == Py_None)
1571 return PyList_New(0);
1572
1573 return PyDict_Keys(self->extra->attrib);
1574 }
1575
1576 static Py_ssize_t
element_length(ElementObject * self)1577 element_length(ElementObject* self)
1578 {
1579 if (!self->extra)
1580 return 0;
1581
1582 return self->extra->length;
1583 }
1584
1585 /*[clinic input]
1586 _elementtree.Element.makeelement
1587
1588 tag: object
1589 attrib: object
1590 /
1591
1592 [clinic start generated code]*/
1593
1594 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1595 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1596 PyObject *attrib)
1597 /*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
1598 {
1599 PyObject* elem;
1600
1601 attrib = PyDict_Copy(attrib);
1602 if (!attrib)
1603 return NULL;
1604
1605 elem = create_new_element(tag, attrib);
1606
1607 Py_DECREF(attrib);
1608
1609 return elem;
1610 }
1611
1612 /*[clinic input]
1613 _elementtree.Element.remove
1614
1615 subelement: object(subclass_of='&Element_Type')
1616 /
1617
1618 [clinic start generated code]*/
1619
1620 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1621 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1622 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1623 {
1624 Py_ssize_t i;
1625 int rc;
1626 PyObject *found;
1627
1628 if (!self->extra) {
1629 /* element has no children, so raise exception */
1630 PyErr_SetString(
1631 PyExc_ValueError,
1632 "list.remove(x): x not in list"
1633 );
1634 return NULL;
1635 }
1636
1637 for (i = 0; i < self->extra->length; i++) {
1638 if (self->extra->children[i] == subelement)
1639 break;
1640 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1641 if (rc > 0)
1642 break;
1643 if (rc < 0)
1644 return NULL;
1645 }
1646
1647 if (i >= self->extra->length) {
1648 /* subelement is not in children, so raise exception */
1649 PyErr_SetString(
1650 PyExc_ValueError,
1651 "list.remove(x): x not in list"
1652 );
1653 return NULL;
1654 }
1655
1656 found = self->extra->children[i];
1657
1658 self->extra->length--;
1659 for (; i < self->extra->length; i++)
1660 self->extra->children[i] = self->extra->children[i+1];
1661
1662 Py_DECREF(found);
1663 Py_RETURN_NONE;
1664 }
1665
1666 static PyObject*
element_repr(ElementObject * self)1667 element_repr(ElementObject* self)
1668 {
1669 int status;
1670
1671 if (self->tag == NULL)
1672 return PyUnicode_FromFormat("<Element at %p>", self);
1673
1674 status = Py_ReprEnter((PyObject *)self);
1675 if (status == 0) {
1676 PyObject *res;
1677 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1678 Py_ReprLeave((PyObject *)self);
1679 return res;
1680 }
1681 if (status > 0)
1682 PyErr_Format(PyExc_RuntimeError,
1683 "reentrant call inside %s.__repr__",
1684 Py_TYPE(self)->tp_name);
1685 return NULL;
1686 }
1687
1688 /*[clinic input]
1689 _elementtree.Element.set
1690
1691 key: object
1692 value: object
1693 /
1694
1695 [clinic start generated code]*/
1696
1697 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1698 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1699 PyObject *value)
1700 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1701 {
1702 PyObject* attrib;
1703
1704 if (!self->extra) {
1705 if (create_extra(self, NULL) < 0)
1706 return NULL;
1707 }
1708
1709 attrib = element_get_attrib(self);
1710 if (!attrib)
1711 return NULL;
1712
1713 if (PyDict_SetItem(attrib, key, value) < 0)
1714 return NULL;
1715
1716 Py_RETURN_NONE;
1717 }
1718
1719 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1720 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1721 {
1722 ElementObject* self = (ElementObject*) self_;
1723 Py_ssize_t i;
1724 PyObject* old;
1725
1726 if (!self->extra || index < 0 || index >= self->extra->length) {
1727 PyErr_SetString(
1728 PyExc_IndexError,
1729 "child assignment index out of range");
1730 return -1;
1731 }
1732
1733 old = self->extra->children[index];
1734
1735 if (item) {
1736 Py_INCREF(item);
1737 self->extra->children[index] = item;
1738 } else {
1739 self->extra->length--;
1740 for (i = index; i < self->extra->length; i++)
1741 self->extra->children[i] = self->extra->children[i+1];
1742 }
1743
1744 Py_DECREF(old);
1745
1746 return 0;
1747 }
1748
1749 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1750 element_subscr(PyObject* self_, PyObject* item)
1751 {
1752 ElementObject* self = (ElementObject*) self_;
1753
1754 if (PyIndex_Check(item)) {
1755 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1756
1757 if (i == -1 && PyErr_Occurred()) {
1758 return NULL;
1759 }
1760 if (i < 0 && self->extra)
1761 i += self->extra->length;
1762 return element_getitem(self_, i);
1763 }
1764 else if (PySlice_Check(item)) {
1765 Py_ssize_t start, stop, step, slicelen, i;
1766 size_t cur;
1767 PyObject* list;
1768
1769 if (!self->extra)
1770 return PyList_New(0);
1771
1772 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1773 return NULL;
1774 }
1775 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1776 step);
1777
1778 if (slicelen <= 0)
1779 return PyList_New(0);
1780 else {
1781 list = PyList_New(slicelen);
1782 if (!list)
1783 return NULL;
1784
1785 for (cur = start, i = 0; i < slicelen;
1786 cur += step, i++) {
1787 PyObject* item = self->extra->children[cur];
1788 Py_INCREF(item);
1789 PyList_SET_ITEM(list, i, item);
1790 }
1791
1792 return list;
1793 }
1794 }
1795 else {
1796 PyErr_SetString(PyExc_TypeError,
1797 "element indices must be integers");
1798 return NULL;
1799 }
1800 }
1801
1802 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1803 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1804 {
1805 ElementObject* self = (ElementObject*) self_;
1806
1807 if (PyIndex_Check(item)) {
1808 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1809
1810 if (i == -1 && PyErr_Occurred()) {
1811 return -1;
1812 }
1813 if (i < 0 && self->extra)
1814 i += self->extra->length;
1815 return element_setitem(self_, i, value);
1816 }
1817 else if (PySlice_Check(item)) {
1818 Py_ssize_t start, stop, step, slicelen, newlen, i;
1819 size_t cur;
1820
1821 PyObject* recycle = NULL;
1822 PyObject* seq;
1823
1824 if (!self->extra) {
1825 if (create_extra(self, NULL) < 0)
1826 return -1;
1827 }
1828
1829 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1830 return -1;
1831 }
1832 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1833 step);
1834
1835 if (value == NULL) {
1836 /* Delete slice */
1837 size_t cur;
1838 Py_ssize_t i;
1839
1840 if (slicelen <= 0)
1841 return 0;
1842
1843 /* Since we're deleting, the direction of the range doesn't matter,
1844 * so for simplicity make it always ascending.
1845 */
1846 if (step < 0) {
1847 stop = start + 1;
1848 start = stop + step * (slicelen - 1) - 1;
1849 step = -step;
1850 }
1851
1852 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1853
1854 /* recycle is a list that will contain all the children
1855 * scheduled for removal.
1856 */
1857 if (!(recycle = PyList_New(slicelen))) {
1858 return -1;
1859 }
1860
1861 /* This loop walks over all the children that have to be deleted,
1862 * with cur pointing at them. num_moved is the amount of children
1863 * until the next deleted child that have to be "shifted down" to
1864 * occupy the deleted's places.
1865 * Note that in the ith iteration, shifting is done i+i places down
1866 * because i children were already removed.
1867 */
1868 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1869 /* Compute how many children have to be moved, clipping at the
1870 * list end.
1871 */
1872 Py_ssize_t num_moved = step - 1;
1873 if (cur + step >= (size_t)self->extra->length) {
1874 num_moved = self->extra->length - cur - 1;
1875 }
1876
1877 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1878
1879 memmove(
1880 self->extra->children + cur - i,
1881 self->extra->children + cur + 1,
1882 num_moved * sizeof(PyObject *));
1883 }
1884
1885 /* Leftover "tail" after the last removed child */
1886 cur = start + (size_t)slicelen * step;
1887 if (cur < (size_t)self->extra->length) {
1888 memmove(
1889 self->extra->children + cur - slicelen,
1890 self->extra->children + cur,
1891 (self->extra->length - cur) * sizeof(PyObject *));
1892 }
1893
1894 self->extra->length -= slicelen;
1895
1896 /* Discard the recycle list with all the deleted sub-elements */
1897 Py_DECREF(recycle);
1898 return 0;
1899 }
1900
1901 /* A new slice is actually being assigned */
1902 seq = PySequence_Fast(value, "");
1903 if (!seq) {
1904 PyErr_Format(
1905 PyExc_TypeError,
1906 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1907 );
1908 return -1;
1909 }
1910 newlen = PySequence_Fast_GET_SIZE(seq);
1911
1912 if (step != 1 && newlen != slicelen)
1913 {
1914 Py_DECREF(seq);
1915 PyErr_Format(PyExc_ValueError,
1916 "attempt to assign sequence of size %zd "
1917 "to extended slice of size %zd",
1918 newlen, slicelen
1919 );
1920 return -1;
1921 }
1922
1923 /* Resize before creating the recycle bin, to prevent refleaks. */
1924 if (newlen > slicelen) {
1925 if (element_resize(self, newlen - slicelen) < 0) {
1926 Py_DECREF(seq);
1927 return -1;
1928 }
1929 }
1930
1931 if (slicelen > 0) {
1932 /* to avoid recursive calls to this method (via decref), move
1933 old items to the recycle bin here, and get rid of them when
1934 we're done modifying the element */
1935 recycle = PyList_New(slicelen);
1936 if (!recycle) {
1937 Py_DECREF(seq);
1938 return -1;
1939 }
1940 for (cur = start, i = 0; i < slicelen;
1941 cur += step, i++)
1942 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1943 }
1944
1945 if (newlen < slicelen) {
1946 /* delete slice */
1947 for (i = stop; i < self->extra->length; i++)
1948 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1949 } else if (newlen > slicelen) {
1950 /* insert slice */
1951 for (i = self->extra->length-1; i >= stop; i--)
1952 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1953 }
1954
1955 /* replace the slice */
1956 for (cur = start, i = 0; i < newlen;
1957 cur += step, i++) {
1958 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1959 Py_INCREF(element);
1960 self->extra->children[cur] = element;
1961 }
1962
1963 self->extra->length += newlen - slicelen;
1964
1965 Py_DECREF(seq);
1966
1967 /* discard the recycle bin, and everything in it */
1968 Py_XDECREF(recycle);
1969
1970 return 0;
1971 }
1972 else {
1973 PyErr_SetString(PyExc_TypeError,
1974 "element indices must be integers");
1975 return -1;
1976 }
1977 }
1978
1979 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1980 element_tag_getter(ElementObject *self, void *closure)
1981 {
1982 PyObject *res = self->tag;
1983 Py_INCREF(res);
1984 return res;
1985 }
1986
1987 static PyObject*
element_text_getter(ElementObject * self,void * closure)1988 element_text_getter(ElementObject *self, void *closure)
1989 {
1990 PyObject *res = element_get_text(self);
1991 Py_XINCREF(res);
1992 return res;
1993 }
1994
1995 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1996 element_tail_getter(ElementObject *self, void *closure)
1997 {
1998 PyObject *res = element_get_tail(self);
1999 Py_XINCREF(res);
2000 return res;
2001 }
2002
2003 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)2004 element_attrib_getter(ElementObject *self, void *closure)
2005 {
2006 PyObject *res;
2007 if (!self->extra) {
2008 if (create_extra(self, NULL) < 0)
2009 return NULL;
2010 }
2011 res = element_get_attrib(self);
2012 Py_XINCREF(res);
2013 return res;
2014 }
2015
2016 /* macro for setter validation */
2017 #define _VALIDATE_ATTR_VALUE(V) \
2018 if ((V) == NULL) { \
2019 PyErr_SetString( \
2020 PyExc_AttributeError, \
2021 "can't delete element attribute"); \
2022 return -1; \
2023 }
2024
2025 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)2026 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2027 {
2028 _VALIDATE_ATTR_VALUE(value);
2029 Py_INCREF(value);
2030 Py_SETREF(self->tag, value);
2031 return 0;
2032 }
2033
2034 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2035 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2036 {
2037 _VALIDATE_ATTR_VALUE(value);
2038 Py_INCREF(value);
2039 _set_joined_ptr(&self->text, value);
2040 return 0;
2041 }
2042
2043 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2044 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2045 {
2046 _VALIDATE_ATTR_VALUE(value);
2047 Py_INCREF(value);
2048 _set_joined_ptr(&self->tail, value);
2049 return 0;
2050 }
2051
2052 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2053 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2054 {
2055 _VALIDATE_ATTR_VALUE(value);
2056 if (!self->extra) {
2057 if (create_extra(self, NULL) < 0)
2058 return -1;
2059 }
2060 Py_INCREF(value);
2061 Py_SETREF(self->extra->attrib, value);
2062 return 0;
2063 }
2064
2065 static PySequenceMethods element_as_sequence = {
2066 (lenfunc) element_length,
2067 0, /* sq_concat */
2068 0, /* sq_repeat */
2069 element_getitem,
2070 0,
2071 element_setitem,
2072 0,
2073 };
2074
2075 /******************************* Element iterator ****************************/
2076
2077 /* ElementIterObject represents the iteration state over an XML element in
2078 * pre-order traversal. To keep track of which sub-element should be returned
2079 * next, a stack of parents is maintained. This is a standard stack-based
2080 * iterative pre-order traversal of a tree.
2081 * The stack is managed using a continuous array.
2082 * Each stack item contains the saved parent to which we should return after
2083 * the current one is exhausted, and the next child to examine in that parent.
2084 */
2085 typedef struct ParentLocator_t {
2086 ElementObject *parent;
2087 Py_ssize_t child_index;
2088 } ParentLocator;
2089
2090 typedef struct {
2091 PyObject_HEAD
2092 ParentLocator *parent_stack;
2093 Py_ssize_t parent_stack_used;
2094 Py_ssize_t parent_stack_size;
2095 ElementObject *root_element;
2096 PyObject *sought_tag;
2097 int gettext;
2098 } ElementIterObject;
2099
2100
2101 static void
elementiter_dealloc(ElementIterObject * it)2102 elementiter_dealloc(ElementIterObject *it)
2103 {
2104 Py_ssize_t i = it->parent_stack_used;
2105 it->parent_stack_used = 0;
2106 /* bpo-31095: UnTrack is needed before calling any callbacks */
2107 PyObject_GC_UnTrack(it);
2108 while (i--)
2109 Py_XDECREF(it->parent_stack[i].parent);
2110 PyMem_Free(it->parent_stack);
2111
2112 Py_XDECREF(it->sought_tag);
2113 Py_XDECREF(it->root_element);
2114
2115 PyObject_GC_Del(it);
2116 }
2117
2118 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2119 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2120 {
2121 Py_ssize_t i = it->parent_stack_used;
2122 while (i--)
2123 Py_VISIT(it->parent_stack[i].parent);
2124
2125 Py_VISIT(it->root_element);
2126 Py_VISIT(it->sought_tag);
2127 return 0;
2128 }
2129
2130 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2131 */
2132 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2133 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2134 {
2135 ParentLocator *item;
2136
2137 if (it->parent_stack_used >= it->parent_stack_size) {
2138 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2139 ParentLocator *parent_stack = it->parent_stack;
2140 PyMem_Resize(parent_stack, ParentLocator, new_size);
2141 if (parent_stack == NULL)
2142 return -1;
2143 it->parent_stack = parent_stack;
2144 it->parent_stack_size = new_size;
2145 }
2146 item = it->parent_stack + it->parent_stack_used++;
2147 Py_INCREF(parent);
2148 item->parent = parent;
2149 item->child_index = 0;
2150 return 0;
2151 }
2152
2153 static PyObject *
elementiter_next(ElementIterObject * it)2154 elementiter_next(ElementIterObject *it)
2155 {
2156 /* Sub-element iterator.
2157 *
2158 * A short note on gettext: this function serves both the iter() and
2159 * itertext() methods to avoid code duplication. However, there are a few
2160 * small differences in the way these iterations work. Namely:
2161 * - itertext() only yields text from nodes that have it, and continues
2162 * iterating when a node doesn't have text (so it doesn't return any
2163 * node like iter())
2164 * - itertext() also has to handle tail, after finishing with all the
2165 * children of a node.
2166 */
2167 int rc;
2168 ElementObject *elem;
2169 PyObject *text;
2170
2171 while (1) {
2172 /* Handle the case reached in the beginning and end of iteration, where
2173 * the parent stack is empty. If root_element is NULL and we're here, the
2174 * iterator is exhausted.
2175 */
2176 if (!it->parent_stack_used) {
2177 if (!it->root_element) {
2178 PyErr_SetNone(PyExc_StopIteration);
2179 return NULL;
2180 }
2181
2182 elem = it->root_element; /* steals a reference */
2183 it->root_element = NULL;
2184 }
2185 else {
2186 /* See if there are children left to traverse in the current parent. If
2187 * yes, visit the next child. If not, pop the stack and try again.
2188 */
2189 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2190 Py_ssize_t child_index = item->child_index;
2191 ElementObjectExtra *extra;
2192 elem = item->parent;
2193 extra = elem->extra;
2194 if (!extra || child_index >= extra->length) {
2195 it->parent_stack_used--;
2196 /* Note that extra condition on it->parent_stack_used here;
2197 * this is because itertext() is supposed to only return *inner*
2198 * text, not text following the element it began iteration with.
2199 */
2200 if (it->gettext && it->parent_stack_used) {
2201 text = element_get_tail(elem);
2202 goto gettext;
2203 }
2204 Py_DECREF(elem);
2205 continue;
2206 }
2207
2208 if (!Element_Check(extra->children[child_index])) {
2209 PyErr_Format(PyExc_AttributeError,
2210 "'%.100s' object has no attribute 'iter'",
2211 Py_TYPE(extra->children[child_index])->tp_name);
2212 return NULL;
2213 }
2214 elem = (ElementObject *)extra->children[child_index];
2215 item->child_index++;
2216 Py_INCREF(elem);
2217 }
2218
2219 if (parent_stack_push_new(it, elem) < 0) {
2220 Py_DECREF(elem);
2221 PyErr_NoMemory();
2222 return NULL;
2223 }
2224 if (it->gettext) {
2225 text = element_get_text(elem);
2226 goto gettext;
2227 }
2228
2229 if (it->sought_tag == Py_None)
2230 return (PyObject *)elem;
2231
2232 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2233 if (rc > 0)
2234 return (PyObject *)elem;
2235
2236 Py_DECREF(elem);
2237 if (rc < 0)
2238 return NULL;
2239 continue;
2240
2241 gettext:
2242 if (!text) {
2243 Py_DECREF(elem);
2244 return NULL;
2245 }
2246 if (text == Py_None) {
2247 Py_DECREF(elem);
2248 }
2249 else {
2250 Py_INCREF(text);
2251 Py_DECREF(elem);
2252 rc = PyObject_IsTrue(text);
2253 if (rc > 0)
2254 return text;
2255 Py_DECREF(text);
2256 if (rc < 0)
2257 return NULL;
2258 }
2259 }
2260
2261 return NULL;
2262 }
2263
2264
2265 static PyTypeObject ElementIter_Type = {
2266 PyVarObject_HEAD_INIT(NULL, 0)
2267 /* Using the module's name since the pure-Python implementation does not
2268 have such a type. */
2269 "_elementtree._element_iterator", /* tp_name */
2270 sizeof(ElementIterObject), /* tp_basicsize */
2271 0, /* tp_itemsize */
2272 /* methods */
2273 (destructor)elementiter_dealloc, /* tp_dealloc */
2274 0, /* tp_print */
2275 0, /* tp_getattr */
2276 0, /* tp_setattr */
2277 0, /* tp_reserved */
2278 0, /* tp_repr */
2279 0, /* tp_as_number */
2280 0, /* tp_as_sequence */
2281 0, /* tp_as_mapping */
2282 0, /* tp_hash */
2283 0, /* tp_call */
2284 0, /* tp_str */
2285 0, /* tp_getattro */
2286 0, /* tp_setattro */
2287 0, /* tp_as_buffer */
2288 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2289 0, /* tp_doc */
2290 (traverseproc)elementiter_traverse, /* tp_traverse */
2291 0, /* tp_clear */
2292 0, /* tp_richcompare */
2293 0, /* tp_weaklistoffset */
2294 PyObject_SelfIter, /* tp_iter */
2295 (iternextfunc)elementiter_next, /* tp_iternext */
2296 0, /* tp_methods */
2297 0, /* tp_members */
2298 0, /* tp_getset */
2299 0, /* tp_base */
2300 0, /* tp_dict */
2301 0, /* tp_descr_get */
2302 0, /* tp_descr_set */
2303 0, /* tp_dictoffset */
2304 0, /* tp_init */
2305 0, /* tp_alloc */
2306 0, /* tp_new */
2307 };
2308
2309 #define INIT_PARENT_STACK_SIZE 8
2310
2311 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2312 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2313 {
2314 ElementIterObject *it;
2315
2316 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2317 if (!it)
2318 return NULL;
2319
2320 Py_INCREF(tag);
2321 it->sought_tag = tag;
2322 it->gettext = gettext;
2323 Py_INCREF(self);
2324 it->root_element = self;
2325
2326 PyObject_GC_Track(it);
2327
2328 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2329 if (it->parent_stack == NULL) {
2330 Py_DECREF(it);
2331 PyErr_NoMemory();
2332 return NULL;
2333 }
2334 it->parent_stack_used = 0;
2335 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2336
2337 return (PyObject *)it;
2338 }
2339
2340
2341 /* ==================================================================== */
2342 /* the tree builder type */
2343
2344 typedef struct {
2345 PyObject_HEAD
2346
2347 PyObject *root; /* root node (first created node) */
2348
2349 PyObject *this; /* current node */
2350 PyObject *last; /* most recently created node */
2351
2352 PyObject *data; /* data collector (string or list), or NULL */
2353
2354 PyObject *stack; /* element stack */
2355 Py_ssize_t index; /* current stack size (0 means empty) */
2356
2357 PyObject *element_factory;
2358
2359 /* element tracing */
2360 PyObject *events_append; /* the append method of the list of events, or NULL */
2361 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2362 PyObject *end_event_obj;
2363 PyObject *start_ns_event_obj;
2364 PyObject *end_ns_event_obj;
2365 } TreeBuilderObject;
2366
2367 #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
2368
2369 /* -------------------------------------------------------------------- */
2370 /* constructor and destructor */
2371
2372 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2373 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2374 {
2375 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2376 if (t != NULL) {
2377 t->root = NULL;
2378
2379 Py_INCREF(Py_None);
2380 t->this = Py_None;
2381 Py_INCREF(Py_None);
2382 t->last = Py_None;
2383
2384 t->data = NULL;
2385 t->element_factory = NULL;
2386 t->stack = PyList_New(20);
2387 if (!t->stack) {
2388 Py_DECREF(t->this);
2389 Py_DECREF(t->last);
2390 Py_DECREF((PyObject *) t);
2391 return NULL;
2392 }
2393 t->index = 0;
2394
2395 t->events_append = NULL;
2396 t->start_event_obj = t->end_event_obj = NULL;
2397 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2398 }
2399 return (PyObject *)t;
2400 }
2401
2402 /*[clinic input]
2403 _elementtree.TreeBuilder.__init__
2404
2405 element_factory: object = NULL
2406
2407 [clinic start generated code]*/
2408
2409 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory)2410 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2411 PyObject *element_factory)
2412 /*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2413 {
2414 if (element_factory) {
2415 Py_INCREF(element_factory);
2416 Py_XSETREF(self->element_factory, element_factory);
2417 }
2418
2419 return 0;
2420 }
2421
2422 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2423 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2424 {
2425 Py_VISIT(self->end_ns_event_obj);
2426 Py_VISIT(self->start_ns_event_obj);
2427 Py_VISIT(self->end_event_obj);
2428 Py_VISIT(self->start_event_obj);
2429 Py_VISIT(self->events_append);
2430 Py_VISIT(self->root);
2431 Py_VISIT(self->this);
2432 Py_VISIT(self->last);
2433 Py_VISIT(self->data);
2434 Py_VISIT(self->stack);
2435 Py_VISIT(self->element_factory);
2436 return 0;
2437 }
2438
2439 static int
treebuilder_gc_clear(TreeBuilderObject * self)2440 treebuilder_gc_clear(TreeBuilderObject *self)
2441 {
2442 Py_CLEAR(self->end_ns_event_obj);
2443 Py_CLEAR(self->start_ns_event_obj);
2444 Py_CLEAR(self->end_event_obj);
2445 Py_CLEAR(self->start_event_obj);
2446 Py_CLEAR(self->events_append);
2447 Py_CLEAR(self->stack);
2448 Py_CLEAR(self->data);
2449 Py_CLEAR(self->last);
2450 Py_CLEAR(self->this);
2451 Py_CLEAR(self->element_factory);
2452 Py_CLEAR(self->root);
2453 return 0;
2454 }
2455
2456 static void
treebuilder_dealloc(TreeBuilderObject * self)2457 treebuilder_dealloc(TreeBuilderObject *self)
2458 {
2459 PyObject_GC_UnTrack(self);
2460 treebuilder_gc_clear(self);
2461 Py_TYPE(self)->tp_free((PyObject *)self);
2462 }
2463
2464 /* -------------------------------------------------------------------- */
2465 /* helpers for handling of arbitrary element-like objects */
2466
2467 static int
treebuilder_set_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2468 treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
2469 PyObject **dest, _Py_Identifier *name)
2470 {
2471 if (Element_CheckExact(element)) {
2472 PyObject *tmp = JOIN_OBJ(*dest);
2473 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2474 *data = NULL;
2475 Py_DECREF(tmp);
2476 return 0;
2477 }
2478 else {
2479 PyObject *joined = list_join(*data);
2480 int r;
2481 if (joined == NULL)
2482 return -1;
2483 r = _PyObject_SetAttrId(element, name, joined);
2484 Py_DECREF(joined);
2485 if (r < 0)
2486 return -1;
2487 Py_CLEAR(*data);
2488 return 0;
2489 }
2490 }
2491
2492 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2493 treebuilder_flush_data(TreeBuilderObject* self)
2494 {
2495 PyObject *element = self->last;
2496
2497 if (!self->data) {
2498 return 0;
2499 }
2500
2501 if (self->this == element) {
2502 _Py_IDENTIFIER(text);
2503 return treebuilder_set_element_text_or_tail(
2504 element, &self->data,
2505 &((ElementObject *) element)->text, &PyId_text);
2506 }
2507 else {
2508 _Py_IDENTIFIER(tail);
2509 return treebuilder_set_element_text_or_tail(
2510 element, &self->data,
2511 &((ElementObject *) element)->tail, &PyId_tail);
2512 }
2513 }
2514
2515 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2516 treebuilder_add_subelement(PyObject *element, PyObject *child)
2517 {
2518 _Py_IDENTIFIER(append);
2519 if (Element_CheckExact(element)) {
2520 ElementObject *elem = (ElementObject *) element;
2521 return element_add_subelement(elem, child);
2522 }
2523 else {
2524 PyObject *res;
2525 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
2526 if (res == NULL)
2527 return -1;
2528 Py_DECREF(res);
2529 return 0;
2530 }
2531 }
2532
2533 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2534 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2535 PyObject *node)
2536 {
2537 if (action != NULL) {
2538 PyObject *res;
2539 PyObject *event = PyTuple_Pack(2, action, node);
2540 if (event == NULL)
2541 return -1;
2542 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2543 Py_DECREF(event);
2544 if (res == NULL)
2545 return -1;
2546 Py_DECREF(res);
2547 }
2548 return 0;
2549 }
2550
2551 /* -------------------------------------------------------------------- */
2552 /* handlers */
2553
2554 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2555 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2556 PyObject* attrib)
2557 {
2558 PyObject* node;
2559 PyObject* this;
2560 elementtreestate *st = ET_STATE_GLOBAL;
2561
2562 if (treebuilder_flush_data(self) < 0) {
2563 return NULL;
2564 }
2565
2566 if (!self->element_factory || self->element_factory == Py_None) {
2567 node = create_new_element(tag, attrib);
2568 } else if (attrib == Py_None) {
2569 attrib = PyDict_New();
2570 if (!attrib)
2571 return NULL;
2572 node = PyObject_CallFunctionObjArgs(self->element_factory,
2573 tag, attrib, NULL);
2574 Py_DECREF(attrib);
2575 }
2576 else {
2577 node = PyObject_CallFunctionObjArgs(self->element_factory,
2578 tag, attrib, NULL);
2579 }
2580 if (!node) {
2581 return NULL;
2582 }
2583
2584 this = self->this;
2585
2586 if (this != Py_None) {
2587 if (treebuilder_add_subelement(this, node) < 0)
2588 goto error;
2589 } else {
2590 if (self->root) {
2591 PyErr_SetString(
2592 st->parseerror_obj,
2593 "multiple elements on top level"
2594 );
2595 goto error;
2596 }
2597 Py_INCREF(node);
2598 self->root = node;
2599 }
2600
2601 if (self->index < PyList_GET_SIZE(self->stack)) {
2602 if (PyList_SetItem(self->stack, self->index, this) < 0)
2603 goto error;
2604 Py_INCREF(this);
2605 } else {
2606 if (PyList_Append(self->stack, this) < 0)
2607 goto error;
2608 }
2609 self->index++;
2610
2611 Py_INCREF(node);
2612 Py_SETREF(self->this, node);
2613 Py_INCREF(node);
2614 Py_SETREF(self->last, node);
2615
2616 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2617 goto error;
2618
2619 return node;
2620
2621 error:
2622 Py_DECREF(node);
2623 return NULL;
2624 }
2625
2626 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2627 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2628 {
2629 if (!self->data) {
2630 if (self->last == Py_None) {
2631 /* ignore calls to data before the first call to start */
2632 Py_RETURN_NONE;
2633 }
2634 /* store the first item as is */
2635 Py_INCREF(data); self->data = data;
2636 } else {
2637 /* more than one item; use a list to collect items */
2638 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2639 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2640 /* XXX this code path unused in Python 3? */
2641 /* expat often generates single character data sections; handle
2642 the most common case by resizing the existing string... */
2643 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2644 if (_PyBytes_Resize(&self->data, size + 1) < 0)
2645 return NULL;
2646 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2647 } else if (PyList_CheckExact(self->data)) {
2648 if (PyList_Append(self->data, data) < 0)
2649 return NULL;
2650 } else {
2651 PyObject* list = PyList_New(2);
2652 if (!list)
2653 return NULL;
2654 PyList_SET_ITEM(list, 0, self->data);
2655 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2656 self->data = list;
2657 }
2658 }
2659
2660 Py_RETURN_NONE;
2661 }
2662
2663 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2664 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2665 {
2666 PyObject* item;
2667
2668 if (treebuilder_flush_data(self) < 0) {
2669 return NULL;
2670 }
2671
2672 if (self->index == 0) {
2673 PyErr_SetString(
2674 PyExc_IndexError,
2675 "pop from empty stack"
2676 );
2677 return NULL;
2678 }
2679
2680 item = self->last;
2681 self->last = self->this;
2682 self->index--;
2683 self->this = PyList_GET_ITEM(self->stack, self->index);
2684 Py_INCREF(self->this);
2685 Py_DECREF(item);
2686
2687 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2688 return NULL;
2689
2690 Py_INCREF(self->last);
2691 return (PyObject*) self->last;
2692 }
2693
2694 /* -------------------------------------------------------------------- */
2695 /* methods (in alphabetical order) */
2696
2697 /*[clinic input]
2698 _elementtree.TreeBuilder.data
2699
2700 data: object
2701 /
2702
2703 [clinic start generated code]*/
2704
2705 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2706 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2707 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2708 {
2709 return treebuilder_handle_data(self, data);
2710 }
2711
2712 /*[clinic input]
2713 _elementtree.TreeBuilder.end
2714
2715 tag: object
2716 /
2717
2718 [clinic start generated code]*/
2719
2720 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2721 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2722 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2723 {
2724 return treebuilder_handle_end(self, tag);
2725 }
2726
2727 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)2728 treebuilder_done(TreeBuilderObject* self)
2729 {
2730 PyObject* res;
2731
2732 /* FIXME: check stack size? */
2733
2734 if (self->root)
2735 res = self->root;
2736 else
2737 res = Py_None;
2738
2739 Py_INCREF(res);
2740 return res;
2741 }
2742
2743 /*[clinic input]
2744 _elementtree.TreeBuilder.close
2745
2746 [clinic start generated code]*/
2747
2748 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)2749 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2750 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2751 {
2752 return treebuilder_done(self);
2753 }
2754
2755 /*[clinic input]
2756 _elementtree.TreeBuilder.start
2757
2758 tag: object
2759 attrs: object = None
2760 /
2761
2762 [clinic start generated code]*/
2763
2764 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)2765 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2766 PyObject *attrs)
2767 /*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
2768 {
2769 return treebuilder_handle_start(self, tag, attrs);
2770 }
2771
2772 /* ==================================================================== */
2773 /* the expat interface */
2774
2775 #include "expat.h"
2776 #include "pyexpat.h"
2777
2778 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2779 * cached globally without being in per-module state.
2780 */
2781 static struct PyExpat_CAPI *expat_capi;
2782 #define EXPAT(func) (expat_capi->func)
2783
2784 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2785 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2786
2787 typedef struct {
2788 PyObject_HEAD
2789
2790 XML_Parser parser;
2791
2792 PyObject *target;
2793 PyObject *entity;
2794
2795 PyObject *names;
2796
2797 PyObject *handle_start;
2798 PyObject *handle_data;
2799 PyObject *handle_end;
2800
2801 PyObject *handle_comment;
2802 PyObject *handle_pi;
2803 PyObject *handle_doctype;
2804
2805 PyObject *handle_close;
2806
2807 } XMLParserObject;
2808
2809 static PyObject*
2810 _elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
2811 static PyObject *
2812 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2813 PyObject *pubid, PyObject *system);
2814
2815 /* helpers */
2816
2817 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)2818 makeuniversal(XMLParserObject* self, const char* string)
2819 {
2820 /* convert a UTF-8 tag/attribute name from the expat parser
2821 to a universal name string */
2822
2823 Py_ssize_t size = (Py_ssize_t) strlen(string);
2824 PyObject* key;
2825 PyObject* value;
2826
2827 /* look the 'raw' name up in the names dictionary */
2828 key = PyBytes_FromStringAndSize(string, size);
2829 if (!key)
2830 return NULL;
2831
2832 value = PyDict_GetItem(self->names, key);
2833
2834 if (value) {
2835 Py_INCREF(value);
2836 } else {
2837 /* new name. convert to universal name, and decode as
2838 necessary */
2839
2840 PyObject* tag;
2841 char* p;
2842 Py_ssize_t i;
2843
2844 /* look for namespace separator */
2845 for (i = 0; i < size; i++)
2846 if (string[i] == '}')
2847 break;
2848 if (i != size) {
2849 /* convert to universal name */
2850 tag = PyBytes_FromStringAndSize(NULL, size+1);
2851 if (tag == NULL) {
2852 Py_DECREF(key);
2853 return NULL;
2854 }
2855 p = PyBytes_AS_STRING(tag);
2856 p[0] = '{';
2857 memcpy(p+1, string, size);
2858 size++;
2859 } else {
2860 /* plain name; use key as tag */
2861 Py_INCREF(key);
2862 tag = key;
2863 }
2864
2865 /* decode universal name */
2866 p = PyBytes_AS_STRING(tag);
2867 value = PyUnicode_DecodeUTF8(p, size, "strict");
2868 Py_DECREF(tag);
2869 if (!value) {
2870 Py_DECREF(key);
2871 return NULL;
2872 }
2873
2874 /* add to names dictionary */
2875 if (PyDict_SetItem(self->names, key, value) < 0) {
2876 Py_DECREF(key);
2877 Py_DECREF(value);
2878 return NULL;
2879 }
2880 }
2881
2882 Py_DECREF(key);
2883 return value;
2884 }
2885
2886 /* Set the ParseError exception with the given parameters.
2887 * If message is not NULL, it's used as the error string. Otherwise, the
2888 * message string is the default for the given error_code.
2889 */
2890 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)2891 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2892 const char *message)
2893 {
2894 PyObject *errmsg, *error, *position, *code;
2895 elementtreestate *st = ET_STATE_GLOBAL;
2896
2897 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
2898 message ? message : EXPAT(ErrorString)(error_code),
2899 line, column);
2900 if (errmsg == NULL)
2901 return;
2902
2903 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
2904 Py_DECREF(errmsg);
2905 if (!error)
2906 return;
2907
2908 /* Add code and position attributes */
2909 code = PyLong_FromLong((long)error_code);
2910 if (!code) {
2911 Py_DECREF(error);
2912 return;
2913 }
2914 if (PyObject_SetAttrString(error, "code", code) == -1) {
2915 Py_DECREF(error);
2916 Py_DECREF(code);
2917 return;
2918 }
2919 Py_DECREF(code);
2920
2921 position = Py_BuildValue("(nn)", line, column);
2922 if (!position) {
2923 Py_DECREF(error);
2924 return;
2925 }
2926 if (PyObject_SetAttrString(error, "position", position) == -1) {
2927 Py_DECREF(error);
2928 Py_DECREF(position);
2929 return;
2930 }
2931 Py_DECREF(position);
2932
2933 PyErr_SetObject(st->parseerror_obj, error);
2934 Py_DECREF(error);
2935 }
2936
2937 /* -------------------------------------------------------------------- */
2938 /* handlers */
2939
2940 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)2941 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2942 int data_len)
2943 {
2944 PyObject* key;
2945 PyObject* value;
2946 PyObject* res;
2947
2948 if (data_len < 2 || data_in[0] != '&')
2949 return;
2950
2951 if (PyErr_Occurred())
2952 return;
2953
2954 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
2955 if (!key)
2956 return;
2957
2958 value = PyDict_GetItem(self->entity, key);
2959
2960 if (value) {
2961 if (TreeBuilder_CheckExact(self->target))
2962 res = treebuilder_handle_data(
2963 (TreeBuilderObject*) self->target, value
2964 );
2965 else if (self->handle_data)
2966 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
2967 else
2968 res = NULL;
2969 Py_XDECREF(res);
2970 } else if (!PyErr_Occurred()) {
2971 /* Report the first error, not the last */
2972 char message[128] = "undefined entity ";
2973 strncat(message, data_in, data_len < 100?data_len:100);
2974 expat_set_error(
2975 XML_ERROR_UNDEFINED_ENTITY,
2976 EXPAT(GetErrorLineNumber)(self->parser),
2977 EXPAT(GetErrorColumnNumber)(self->parser),
2978 message
2979 );
2980 }
2981
2982 Py_DECREF(key);
2983 }
2984
2985 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)2986 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2987 const XML_Char **attrib_in)
2988 {
2989 PyObject* res;
2990 PyObject* tag;
2991 PyObject* attrib;
2992 int ok;
2993
2994 if (PyErr_Occurred())
2995 return;
2996
2997 /* tag name */
2998 tag = makeuniversal(self, tag_in);
2999 if (!tag)
3000 return; /* parser will look for errors */
3001
3002 /* attributes */
3003 if (attrib_in[0]) {
3004 attrib = PyDict_New();
3005 if (!attrib) {
3006 Py_DECREF(tag);
3007 return;
3008 }
3009 while (attrib_in[0] && attrib_in[1]) {
3010 PyObject* key = makeuniversal(self, attrib_in[0]);
3011 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3012 if (!key || !value) {
3013 Py_XDECREF(value);
3014 Py_XDECREF(key);
3015 Py_DECREF(attrib);
3016 Py_DECREF(tag);
3017 return;
3018 }
3019 ok = PyDict_SetItem(attrib, key, value);
3020 Py_DECREF(value);
3021 Py_DECREF(key);
3022 if (ok < 0) {
3023 Py_DECREF(attrib);
3024 Py_DECREF(tag);
3025 return;
3026 }
3027 attrib_in += 2;
3028 }
3029 } else {
3030 Py_INCREF(Py_None);
3031 attrib = Py_None;
3032 }
3033
3034 if (TreeBuilder_CheckExact(self->target)) {
3035 /* shortcut */
3036 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3037 tag, attrib);
3038 }
3039 else if (self->handle_start) {
3040 if (attrib == Py_None) {
3041 Py_DECREF(attrib);
3042 attrib = PyDict_New();
3043 if (!attrib) {
3044 Py_DECREF(tag);
3045 return;
3046 }
3047 }
3048 res = PyObject_CallFunctionObjArgs(self->handle_start,
3049 tag, attrib, NULL);
3050 } else
3051 res = NULL;
3052
3053 Py_DECREF(tag);
3054 Py_DECREF(attrib);
3055
3056 Py_XDECREF(res);
3057 }
3058
3059 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3060 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3061 int data_len)
3062 {
3063 PyObject* data;
3064 PyObject* res;
3065
3066 if (PyErr_Occurred())
3067 return;
3068
3069 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3070 if (!data)
3071 return; /* parser will look for errors */
3072
3073 if (TreeBuilder_CheckExact(self->target))
3074 /* shortcut */
3075 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3076 else if (self->handle_data)
3077 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
3078 else
3079 res = NULL;
3080
3081 Py_DECREF(data);
3082
3083 Py_XDECREF(res);
3084 }
3085
3086 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3087 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3088 {
3089 PyObject* tag;
3090 PyObject* res = NULL;
3091
3092 if (PyErr_Occurred())
3093 return;
3094
3095 if (TreeBuilder_CheckExact(self->target))
3096 /* shortcut */
3097 /* the standard tree builder doesn't look at the end tag */
3098 res = treebuilder_handle_end(
3099 (TreeBuilderObject*) self->target, Py_None
3100 );
3101 else if (self->handle_end) {
3102 tag = makeuniversal(self, tag_in);
3103 if (tag) {
3104 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
3105 Py_DECREF(tag);
3106 }
3107 }
3108
3109 Py_XDECREF(res);
3110 }
3111
3112 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix,const XML_Char * uri)3113 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3114 const XML_Char *uri)
3115 {
3116 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3117 PyObject *parcel;
3118
3119 if (PyErr_Occurred())
3120 return;
3121
3122 if (!target->events_append || !target->start_ns_event_obj)
3123 return;
3124
3125 if (!uri)
3126 uri = "";
3127 if (!prefix)
3128 prefix = "";
3129
3130 parcel = Py_BuildValue("ss", prefix, uri);
3131 if (!parcel)
3132 return;
3133 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3134 Py_DECREF(parcel);
3135 }
3136
3137 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3138 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3139 {
3140 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3141
3142 if (PyErr_Occurred())
3143 return;
3144
3145 if (!target->events_append)
3146 return;
3147
3148 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
3149 }
3150
3151 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3152 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3153 {
3154 PyObject* comment;
3155 PyObject* res;
3156
3157 if (PyErr_Occurred())
3158 return;
3159
3160 if (self->handle_comment) {
3161 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3162 if (comment) {
3163 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3164 comment, NULL);
3165 Py_XDECREF(res);
3166 Py_DECREF(comment);
3167 }
3168 }
3169 }
3170
3171 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3172 expat_start_doctype_handler(XMLParserObject *self,
3173 const XML_Char *doctype_name,
3174 const XML_Char *sysid,
3175 const XML_Char *pubid,
3176 int has_internal_subset)
3177 {
3178 PyObject *self_pyobj = (PyObject *)self;
3179 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3180 PyObject *parser_doctype = NULL;
3181 PyObject *res = NULL;
3182
3183 if (PyErr_Occurred())
3184 return;
3185
3186 doctype_name_obj = makeuniversal(self, doctype_name);
3187 if (!doctype_name_obj)
3188 return;
3189
3190 if (sysid) {
3191 sysid_obj = makeuniversal(self, sysid);
3192 if (!sysid_obj) {
3193 Py_DECREF(doctype_name_obj);
3194 return;
3195 }
3196 } else {
3197 Py_INCREF(Py_None);
3198 sysid_obj = Py_None;
3199 }
3200
3201 if (pubid) {
3202 pubid_obj = makeuniversal(self, pubid);
3203 if (!pubid_obj) {
3204 Py_DECREF(doctype_name_obj);
3205 Py_DECREF(sysid_obj);
3206 return;
3207 }
3208 } else {
3209 Py_INCREF(Py_None);
3210 pubid_obj = Py_None;
3211 }
3212
3213 /* If the target has a handler for doctype, call it. */
3214 if (self->handle_doctype) {
3215 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3216 doctype_name_obj, pubid_obj,
3217 sysid_obj, NULL);
3218 Py_CLEAR(res);
3219 }
3220 else {
3221 /* Now see if the parser itself has a doctype method. If yes and it's
3222 * a custom method, call it but warn about deprecation. If it's only
3223 * the vanilla XMLParser method, do nothing.
3224 */
3225 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3226 if (parser_doctype &&
3227 !(PyCFunction_Check(parser_doctype) &&
3228 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3229 PyCFunction_GET_FUNCTION(parser_doctype) ==
3230 (PyCFunction) _elementtree_XMLParser_doctype)) {
3231 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3232 pubid_obj, sysid_obj);
3233 if (!res)
3234 goto clear;
3235 Py_DECREF(res);
3236 res = PyObject_CallFunctionObjArgs(parser_doctype,
3237 doctype_name_obj, pubid_obj,
3238 sysid_obj, NULL);
3239 Py_CLEAR(res);
3240 }
3241 }
3242
3243 clear:
3244 Py_XDECREF(parser_doctype);
3245 Py_DECREF(doctype_name_obj);
3246 Py_DECREF(pubid_obj);
3247 Py_DECREF(sysid_obj);
3248 }
3249
3250 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3251 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3252 const XML_Char* data_in)
3253 {
3254 PyObject* target;
3255 PyObject* data;
3256 PyObject* res;
3257
3258 if (PyErr_Occurred())
3259 return;
3260
3261 if (self->handle_pi) {
3262 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3263 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3264 if (target && data) {
3265 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3266 target, data, NULL);
3267 Py_XDECREF(res);
3268 Py_DECREF(data);
3269 Py_DECREF(target);
3270 } else {
3271 Py_XDECREF(data);
3272 Py_XDECREF(target);
3273 }
3274 }
3275 }
3276
3277 /* -------------------------------------------------------------------- */
3278
3279 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3280 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3281 {
3282 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3283 if (self) {
3284 self->parser = NULL;
3285 self->target = self->entity = self->names = NULL;
3286 self->handle_start = self->handle_data = self->handle_end = NULL;
3287 self->handle_comment = self->handle_pi = self->handle_close = NULL;
3288 self->handle_doctype = NULL;
3289 }
3290 return (PyObject *)self;
3291 }
3292
3293 static int
ignore_attribute_error(PyObject * value)3294 ignore_attribute_error(PyObject *value)
3295 {
3296 if (value == NULL) {
3297 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3298 return -1;
3299 }
3300 PyErr_Clear();
3301 }
3302 return 0;
3303 }
3304
3305 /*[clinic input]
3306 _elementtree.XMLParser.__init__
3307
3308 html: object = NULL
3309 target: object = NULL
3310 encoding: str(accept={str, NoneType}) = NULL
3311
3312 [clinic start generated code]*/
3313
3314 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * html,PyObject * target,const char * encoding)3315 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3316 PyObject *target, const char *encoding)
3317 /*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
3318 {
3319 if (html != NULL) {
3320 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3321 "The html argument of XMLParser() is deprecated",
3322 1) < 0) {
3323 return -1;
3324 }
3325 }
3326
3327 self->entity = PyDict_New();
3328 if (!self->entity)
3329 return -1;
3330
3331 self->names = PyDict_New();
3332 if (!self->names) {
3333 Py_CLEAR(self->entity);
3334 return -1;
3335 }
3336
3337 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3338 if (!self->parser) {
3339 Py_CLEAR(self->entity);
3340 Py_CLEAR(self->names);
3341 PyErr_NoMemory();
3342 return -1;
3343 }
3344 /* expat < 2.1.0 has no XML_SetHashSalt() */
3345 if (EXPAT(SetHashSalt) != NULL) {
3346 EXPAT(SetHashSalt)(self->parser,
3347 (unsigned long)_Py_HashSecret.expat.hashsalt);
3348 }
3349
3350 if (target) {
3351 Py_INCREF(target);
3352 } else {
3353 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3354 if (!target) {
3355 Py_CLEAR(self->entity);
3356 Py_CLEAR(self->names);
3357 return -1;
3358 }
3359 }
3360 self->target = target;
3361
3362 self->handle_start = PyObject_GetAttrString(target, "start");
3363 if (ignore_attribute_error(self->handle_start)) {
3364 return -1;
3365 }
3366 self->handle_data = PyObject_GetAttrString(target, "data");
3367 if (ignore_attribute_error(self->handle_data)) {
3368 return -1;
3369 }
3370 self->handle_end = PyObject_GetAttrString(target, "end");
3371 if (ignore_attribute_error(self->handle_end)) {
3372 return -1;
3373 }
3374 self->handle_comment = PyObject_GetAttrString(target, "comment");
3375 if (ignore_attribute_error(self->handle_comment)) {
3376 return -1;
3377 }
3378 self->handle_pi = PyObject_GetAttrString(target, "pi");
3379 if (ignore_attribute_error(self->handle_pi)) {
3380 return -1;
3381 }
3382 self->handle_close = PyObject_GetAttrString(target, "close");
3383 if (ignore_attribute_error(self->handle_close)) {
3384 return -1;
3385 }
3386 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3387 if (ignore_attribute_error(self->handle_doctype)) {
3388 return -1;
3389 }
3390
3391 /* configure parser */
3392 EXPAT(SetUserData)(self->parser, self);
3393 EXPAT(SetElementHandler)(
3394 self->parser,
3395 (XML_StartElementHandler) expat_start_handler,
3396 (XML_EndElementHandler) expat_end_handler
3397 );
3398 EXPAT(SetDefaultHandlerExpand)(
3399 self->parser,
3400 (XML_DefaultHandler) expat_default_handler
3401 );
3402 EXPAT(SetCharacterDataHandler)(
3403 self->parser,
3404 (XML_CharacterDataHandler) expat_data_handler
3405 );
3406 if (self->handle_comment)
3407 EXPAT(SetCommentHandler)(
3408 self->parser,
3409 (XML_CommentHandler) expat_comment_handler
3410 );
3411 if (self->handle_pi)
3412 EXPAT(SetProcessingInstructionHandler)(
3413 self->parser,
3414 (XML_ProcessingInstructionHandler) expat_pi_handler
3415 );
3416 EXPAT(SetStartDoctypeDeclHandler)(
3417 self->parser,
3418 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3419 );
3420 EXPAT(SetUnknownEncodingHandler)(
3421 self->parser,
3422 EXPAT(DefaultUnknownEncodingHandler), NULL
3423 );
3424
3425 return 0;
3426 }
3427
3428 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3429 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3430 {
3431 Py_VISIT(self->handle_close);
3432 Py_VISIT(self->handle_pi);
3433 Py_VISIT(self->handle_comment);
3434 Py_VISIT(self->handle_end);
3435 Py_VISIT(self->handle_data);
3436 Py_VISIT(self->handle_start);
3437
3438 Py_VISIT(self->target);
3439 Py_VISIT(self->entity);
3440 Py_VISIT(self->names);
3441
3442 return 0;
3443 }
3444
3445 static int
xmlparser_gc_clear(XMLParserObject * self)3446 xmlparser_gc_clear(XMLParserObject *self)
3447 {
3448 if (self->parser != NULL) {
3449 XML_Parser parser = self->parser;
3450 self->parser = NULL;
3451 EXPAT(ParserFree)(parser);
3452 }
3453
3454 Py_CLEAR(self->handle_close);
3455 Py_CLEAR(self->handle_pi);
3456 Py_CLEAR(self->handle_comment);
3457 Py_CLEAR(self->handle_end);
3458 Py_CLEAR(self->handle_data);
3459 Py_CLEAR(self->handle_start);
3460 Py_CLEAR(self->handle_doctype);
3461
3462 Py_CLEAR(self->target);
3463 Py_CLEAR(self->entity);
3464 Py_CLEAR(self->names);
3465
3466 return 0;
3467 }
3468
3469 static void
xmlparser_dealloc(XMLParserObject * self)3470 xmlparser_dealloc(XMLParserObject* self)
3471 {
3472 PyObject_GC_UnTrack(self);
3473 xmlparser_gc_clear(self);
3474 Py_TYPE(self)->tp_free((PyObject *)self);
3475 }
3476
3477 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3478 _check_xmlparser(XMLParserObject* self)
3479 {
3480 if (self->target == NULL) {
3481 PyErr_SetString(PyExc_ValueError,
3482 "XMLParser.__init__() wasn't called");
3483 return 0;
3484 }
3485 return 1;
3486 }
3487
3488 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3489 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3490 {
3491 int ok;
3492
3493 assert(!PyErr_Occurred());
3494 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3495
3496 if (PyErr_Occurred())
3497 return NULL;
3498
3499 if (!ok) {
3500 expat_set_error(
3501 EXPAT(GetErrorCode)(self->parser),
3502 EXPAT(GetErrorLineNumber)(self->parser),
3503 EXPAT(GetErrorColumnNumber)(self->parser),
3504 NULL
3505 );
3506 return NULL;
3507 }
3508
3509 Py_RETURN_NONE;
3510 }
3511
3512 /*[clinic input]
3513 _elementtree.XMLParser.close
3514
3515 [clinic start generated code]*/
3516
3517 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3518 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3519 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3520 {
3521 /* end feeding data to parser */
3522
3523 PyObject* res;
3524
3525 if (!_check_xmlparser(self)) {
3526 return NULL;
3527 }
3528 res = expat_parse(self, "", 0, 1);
3529 if (!res)
3530 return NULL;
3531
3532 if (TreeBuilder_CheckExact(self->target)) {
3533 Py_DECREF(res);
3534 return treebuilder_done((TreeBuilderObject*) self->target);
3535 }
3536 else if (self->handle_close) {
3537 Py_DECREF(res);
3538 return _PyObject_CallNoArg(self->handle_close);
3539 }
3540 else {
3541 return res;
3542 }
3543 }
3544
3545 /*[clinic input]
3546 _elementtree.XMLParser.feed
3547
3548 data: object
3549 /
3550
3551 [clinic start generated code]*/
3552
3553 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3554 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3555 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3556 {
3557 /* feed data to parser */
3558
3559 if (!_check_xmlparser(self)) {
3560 return NULL;
3561 }
3562 if (PyUnicode_Check(data)) {
3563 Py_ssize_t data_len;
3564 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3565 if (data_ptr == NULL)
3566 return NULL;
3567 if (data_len > INT_MAX) {
3568 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3569 return NULL;
3570 }
3571 /* Explicitly set UTF-8 encoding. Return code ignored. */
3572 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3573 return expat_parse(self, data_ptr, (int)data_len, 0);
3574 }
3575 else {
3576 Py_buffer view;
3577 PyObject *res;
3578 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3579 return NULL;
3580 if (view.len > INT_MAX) {
3581 PyBuffer_Release(&view);
3582 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3583 return NULL;
3584 }
3585 res = expat_parse(self, view.buf, (int)view.len, 0);
3586 PyBuffer_Release(&view);
3587 return res;
3588 }
3589 }
3590
3591 /*[clinic input]
3592 _elementtree.XMLParser._parse_whole
3593
3594 file: object
3595 /
3596
3597 [clinic start generated code]*/
3598
3599 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3600 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3601 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3602 {
3603 /* (internal) parse the whole input, until end of stream */
3604 PyObject* reader;
3605 PyObject* buffer;
3606 PyObject* temp;
3607 PyObject* res;
3608
3609 if (!_check_xmlparser(self)) {
3610 return NULL;
3611 }
3612 reader = PyObject_GetAttrString(file, "read");
3613 if (!reader)
3614 return NULL;
3615
3616 /* read from open file object */
3617 for (;;) {
3618
3619 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3620
3621 if (!buffer) {
3622 /* read failed (e.g. due to KeyboardInterrupt) */
3623 Py_DECREF(reader);
3624 return NULL;
3625 }
3626
3627 if (PyUnicode_CheckExact(buffer)) {
3628 /* A unicode object is encoded into bytes using UTF-8 */
3629 if (PyUnicode_GET_LENGTH(buffer) == 0) {
3630 Py_DECREF(buffer);
3631 break;
3632 }
3633 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3634 Py_DECREF(buffer);
3635 if (!temp) {
3636 /* Propagate exception from PyUnicode_AsEncodedString */
3637 Py_DECREF(reader);
3638 return NULL;
3639 }
3640 buffer = temp;
3641 }
3642 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3643 Py_DECREF(buffer);
3644 break;
3645 }
3646
3647 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3648 Py_DECREF(buffer);
3649 Py_DECREF(reader);
3650 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3651 return NULL;
3652 }
3653 res = expat_parse(
3654 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3655 );
3656
3657 Py_DECREF(buffer);
3658
3659 if (!res) {
3660 Py_DECREF(reader);
3661 return NULL;
3662 }
3663 Py_DECREF(res);
3664
3665 }
3666
3667 Py_DECREF(reader);
3668
3669 res = expat_parse(self, "", 0, 1);
3670
3671 if (res && TreeBuilder_CheckExact(self->target)) {
3672 Py_DECREF(res);
3673 return treebuilder_done((TreeBuilderObject*) self->target);
3674 }
3675
3676 return res;
3677 }
3678
3679 /*[clinic input]
3680 _elementtree.XMLParser.doctype
3681
3682 name: object
3683 pubid: object
3684 system: object
3685 /
3686
3687 [clinic start generated code]*/
3688
3689 static PyObject *
_elementtree_XMLParser_doctype_impl(XMLParserObject * self,PyObject * name,PyObject * pubid,PyObject * system)3690 _elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3691 PyObject *pubid, PyObject *system)
3692 /*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
3693 {
3694 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3695 "This method of XMLParser is deprecated. Define"
3696 " doctype() method on the TreeBuilder target.",
3697 1) < 0) {
3698 return NULL;
3699 }
3700 Py_RETURN_NONE;
3701 }
3702
3703 /*[clinic input]
3704 _elementtree.XMLParser._setevents
3705
3706 events_queue: object
3707 events_to_report: object = None
3708 /
3709
3710 [clinic start generated code]*/
3711
3712 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)3713 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3714 PyObject *events_queue,
3715 PyObject *events_to_report)
3716 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
3717 {
3718 /* activate element event reporting */
3719 Py_ssize_t i;
3720 TreeBuilderObject *target;
3721 PyObject *events_append, *events_seq;
3722
3723 if (!_check_xmlparser(self)) {
3724 return NULL;
3725 }
3726 if (!TreeBuilder_CheckExact(self->target)) {
3727 PyErr_SetString(
3728 PyExc_TypeError,
3729 "event handling only supported for ElementTree.TreeBuilder "
3730 "targets"
3731 );
3732 return NULL;
3733 }
3734
3735 target = (TreeBuilderObject*) self->target;
3736
3737 events_append = PyObject_GetAttrString(events_queue, "append");
3738 if (events_append == NULL)
3739 return NULL;
3740 Py_XSETREF(target->events_append, events_append);
3741
3742 /* clear out existing events */
3743 Py_CLEAR(target->start_event_obj);
3744 Py_CLEAR(target->end_event_obj);
3745 Py_CLEAR(target->start_ns_event_obj);
3746 Py_CLEAR(target->end_ns_event_obj);
3747
3748 if (events_to_report == Py_None) {
3749 /* default is "end" only */
3750 target->end_event_obj = PyUnicode_FromString("end");
3751 Py_RETURN_NONE;
3752 }
3753
3754 if (!(events_seq = PySequence_Fast(events_to_report,
3755 "events must be a sequence"))) {
3756 return NULL;
3757 }
3758
3759 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
3760 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3761 const char *event_name = NULL;
3762 if (PyUnicode_Check(event_name_obj)) {
3763 event_name = PyUnicode_AsUTF8(event_name_obj);
3764 } else if (PyBytes_Check(event_name_obj)) {
3765 event_name = PyBytes_AS_STRING(event_name_obj);
3766 }
3767 if (event_name == NULL) {
3768 Py_DECREF(events_seq);
3769 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3770 return NULL;
3771 }
3772
3773 Py_INCREF(event_name_obj);
3774 if (strcmp(event_name, "start") == 0) {
3775 Py_XSETREF(target->start_event_obj, event_name_obj);
3776 } else if (strcmp(event_name, "end") == 0) {
3777 Py_XSETREF(target->end_event_obj, event_name_obj);
3778 } else if (strcmp(event_name, "start-ns") == 0) {
3779 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
3780 EXPAT(SetNamespaceDeclHandler)(
3781 self->parser,
3782 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3783 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3784 );
3785 } else if (strcmp(event_name, "end-ns") == 0) {
3786 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
3787 EXPAT(SetNamespaceDeclHandler)(
3788 self->parser,
3789 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3790 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3791 );
3792 } else {
3793 Py_DECREF(event_name_obj);
3794 Py_DECREF(events_seq);
3795 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
3796 return NULL;
3797 }
3798 }
3799
3800 Py_DECREF(events_seq);
3801 Py_RETURN_NONE;
3802 }
3803
3804 static PyObject*
xmlparser_getattro(XMLParserObject * self,PyObject * nameobj)3805 xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
3806 {
3807 if (PyUnicode_Check(nameobj)) {
3808 PyObject* res;
3809 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
3810 res = self->entity;
3811 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
3812 res = self->target;
3813 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
3814 return PyUnicode_FromFormat(
3815 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3816 XML_MINOR_VERSION, XML_MICRO_VERSION);
3817 }
3818 else
3819 goto generic;
3820
3821 if (!res && !_check_xmlparser(self)) {
3822 return NULL;
3823 }
3824 Py_INCREF(res);
3825 return res;
3826 }
3827 generic:
3828 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
3829 }
3830
3831 #include "clinic/_elementtree.c.h"
3832
3833 static PyMethodDef element_methods[] = {
3834
3835 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3836
3837 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3838 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3839
3840 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3841 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3842 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3843
3844 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3845 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3846 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3847 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3848
3849 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3850 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3851 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3852
3853 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
3854 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3855
3856 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3857 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3858
3859 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3860
3861 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3862 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3863 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3864 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3865 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3866
3867 {NULL, NULL}
3868 };
3869
3870 static PyMappingMethods element_as_mapping = {
3871 (lenfunc) element_length,
3872 (binaryfunc) element_subscr,
3873 (objobjargproc) element_ass_subscr,
3874 };
3875
3876 static PyGetSetDef element_getsetlist[] = {
3877 {"tag",
3878 (getter)element_tag_getter,
3879 (setter)element_tag_setter,
3880 "A string identifying what kind of data this element represents"},
3881 {"text",
3882 (getter)element_text_getter,
3883 (setter)element_text_setter,
3884 "A string of text directly after the start tag, or None"},
3885 {"tail",
3886 (getter)element_tail_getter,
3887 (setter)element_tail_setter,
3888 "A string of text directly after the end tag, or None"},
3889 {"attrib",
3890 (getter)element_attrib_getter,
3891 (setter)element_attrib_setter,
3892 "A dictionary containing the element's attributes"},
3893 {NULL},
3894 };
3895
3896 static PyTypeObject Element_Type = {
3897 PyVarObject_HEAD_INIT(NULL, 0)
3898 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3899 /* methods */
3900 (destructor)element_dealloc, /* tp_dealloc */
3901 0, /* tp_print */
3902 0, /* tp_getattr */
3903 0, /* tp_setattr */
3904 0, /* tp_reserved */
3905 (reprfunc)element_repr, /* tp_repr */
3906 0, /* tp_as_number */
3907 &element_as_sequence, /* tp_as_sequence */
3908 &element_as_mapping, /* tp_as_mapping */
3909 0, /* tp_hash */
3910 0, /* tp_call */
3911 0, /* tp_str */
3912 PyObject_GenericGetAttr, /* tp_getattro */
3913 0, /* tp_setattro */
3914 0, /* tp_as_buffer */
3915 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3916 /* tp_flags */
3917 0, /* tp_doc */
3918 (traverseproc)element_gc_traverse, /* tp_traverse */
3919 (inquiry)element_gc_clear, /* tp_clear */
3920 0, /* tp_richcompare */
3921 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3922 0, /* tp_iter */
3923 0, /* tp_iternext */
3924 element_methods, /* tp_methods */
3925 0, /* tp_members */
3926 element_getsetlist, /* tp_getset */
3927 0, /* tp_base */
3928 0, /* tp_dict */
3929 0, /* tp_descr_get */
3930 0, /* tp_descr_set */
3931 0, /* tp_dictoffset */
3932 (initproc)element_init, /* tp_init */
3933 PyType_GenericAlloc, /* tp_alloc */
3934 element_new, /* tp_new */
3935 0, /* tp_free */
3936 };
3937
3938 static PyMethodDef treebuilder_methods[] = {
3939 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3940 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3941 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3942 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3943 {NULL, NULL}
3944 };
3945
3946 static PyTypeObject TreeBuilder_Type = {
3947 PyVarObject_HEAD_INIT(NULL, 0)
3948 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3949 /* methods */
3950 (destructor)treebuilder_dealloc, /* tp_dealloc */
3951 0, /* tp_print */
3952 0, /* tp_getattr */
3953 0, /* tp_setattr */
3954 0, /* tp_reserved */
3955 0, /* tp_repr */
3956 0, /* tp_as_number */
3957 0, /* tp_as_sequence */
3958 0, /* tp_as_mapping */
3959 0, /* tp_hash */
3960 0, /* tp_call */
3961 0, /* tp_str */
3962 0, /* tp_getattro */
3963 0, /* tp_setattro */
3964 0, /* tp_as_buffer */
3965 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3966 /* tp_flags */
3967 0, /* tp_doc */
3968 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3969 (inquiry)treebuilder_gc_clear, /* tp_clear */
3970 0, /* tp_richcompare */
3971 0, /* tp_weaklistoffset */
3972 0, /* tp_iter */
3973 0, /* tp_iternext */
3974 treebuilder_methods, /* tp_methods */
3975 0, /* tp_members */
3976 0, /* tp_getset */
3977 0, /* tp_base */
3978 0, /* tp_dict */
3979 0, /* tp_descr_get */
3980 0, /* tp_descr_set */
3981 0, /* tp_dictoffset */
3982 _elementtree_TreeBuilder___init__, /* tp_init */
3983 PyType_GenericAlloc, /* tp_alloc */
3984 treebuilder_new, /* tp_new */
3985 0, /* tp_free */
3986 };
3987
3988 static PyMethodDef xmlparser_methods[] = {
3989 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3990 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3991 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3992 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3993 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3994 {NULL, NULL}
3995 };
3996
3997 static PyTypeObject XMLParser_Type = {
3998 PyVarObject_HEAD_INIT(NULL, 0)
3999 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4000 /* methods */
4001 (destructor)xmlparser_dealloc, /* tp_dealloc */
4002 0, /* tp_print */
4003 0, /* tp_getattr */
4004 0, /* tp_setattr */
4005 0, /* tp_reserved */
4006 0, /* tp_repr */
4007 0, /* tp_as_number */
4008 0, /* tp_as_sequence */
4009 0, /* tp_as_mapping */
4010 0, /* tp_hash */
4011 0, /* tp_call */
4012 0, /* tp_str */
4013 (getattrofunc)xmlparser_getattro, /* tp_getattro */
4014 0, /* tp_setattro */
4015 0, /* tp_as_buffer */
4016 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4017 /* tp_flags */
4018 0, /* tp_doc */
4019 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4020 (inquiry)xmlparser_gc_clear, /* tp_clear */
4021 0, /* tp_richcompare */
4022 0, /* tp_weaklistoffset */
4023 0, /* tp_iter */
4024 0, /* tp_iternext */
4025 xmlparser_methods, /* tp_methods */
4026 0, /* tp_members */
4027 0, /* tp_getset */
4028 0, /* tp_base */
4029 0, /* tp_dict */
4030 0, /* tp_descr_get */
4031 0, /* tp_descr_set */
4032 0, /* tp_dictoffset */
4033 _elementtree_XMLParser___init__, /* tp_init */
4034 PyType_GenericAlloc, /* tp_alloc */
4035 xmlparser_new, /* tp_new */
4036 0, /* tp_free */
4037 };
4038
4039 /* ==================================================================== */
4040 /* python module interface */
4041
4042 static PyMethodDef _functions[] = {
4043 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
4044 {NULL, NULL}
4045 };
4046
4047
4048 static struct PyModuleDef elementtreemodule = {
4049 PyModuleDef_HEAD_INIT,
4050 "_elementtree",
4051 NULL,
4052 sizeof(elementtreestate),
4053 _functions,
4054 NULL,
4055 elementtree_traverse,
4056 elementtree_clear,
4057 elementtree_free
4058 };
4059
4060 PyMODINIT_FUNC
PyInit__elementtree(void)4061 PyInit__elementtree(void)
4062 {
4063 PyObject *m, *temp;
4064 elementtreestate *st;
4065
4066 m = PyState_FindModule(&elementtreemodule);
4067 if (m) {
4068 Py_INCREF(m);
4069 return m;
4070 }
4071
4072 /* Initialize object types */
4073 if (PyType_Ready(&ElementIter_Type) < 0)
4074 return NULL;
4075 if (PyType_Ready(&TreeBuilder_Type) < 0)
4076 return NULL;
4077 if (PyType_Ready(&Element_Type) < 0)
4078 return NULL;
4079 if (PyType_Ready(&XMLParser_Type) < 0)
4080 return NULL;
4081
4082 m = PyModule_Create(&elementtreemodule);
4083 if (!m)
4084 return NULL;
4085 st = ET_STATE(m);
4086
4087 if (!(temp = PyImport_ImportModule("copy")))
4088 return NULL;
4089 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4090 Py_XDECREF(temp);
4091
4092 if (st->deepcopy_obj == NULL) {
4093 return NULL;
4094 }
4095
4096 assert(!PyErr_Occurred());
4097 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4098 return NULL;
4099
4100 /* link against pyexpat */
4101 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4102 if (expat_capi) {
4103 /* check that it's usable */
4104 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4105 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4106 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4107 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4108 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4109 PyErr_SetString(PyExc_ImportError,
4110 "pyexpat version is incompatible");
4111 return NULL;
4112 }
4113 } else {
4114 return NULL;
4115 }
4116
4117 st->parseerror_obj = PyErr_NewException(
4118 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4119 );
4120 Py_INCREF(st->parseerror_obj);
4121 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
4122
4123 Py_INCREF((PyObject *)&Element_Type);
4124 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4125
4126 Py_INCREF((PyObject *)&TreeBuilder_Type);
4127 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4128
4129 Py_INCREF((PyObject *)&XMLParser_Type);
4130 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
4131
4132 return m;
4133 }
4134