1 #include "Python.h"
2 #include <ctype.h>
3
4 #include "structmember.h"
5 #include "frameobject.h"
6 #include "expat.h"
7
8 #include "pyexpat.h"
9
10 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
11 included methods. */
12 /*[clinic input]
13 module pyexpat
14 [clinic start generated code]*/
15 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
16
17 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
18
19 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
20 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
21
22 enum HandlerTypes {
23 StartElement,
24 EndElement,
25 ProcessingInstruction,
26 CharacterData,
27 UnparsedEntityDecl,
28 NotationDecl,
29 StartNamespaceDecl,
30 EndNamespaceDecl,
31 Comment,
32 StartCdataSection,
33 EndCdataSection,
34 Default,
35 DefaultHandlerExpand,
36 NotStandalone,
37 ExternalEntityRef,
38 StartDoctypeDecl,
39 EndDoctypeDecl,
40 EntityDecl,
41 XmlDecl,
42 ElementDecl,
43 AttlistDecl,
44 #if XML_COMBINED_VERSION >= 19504
45 SkippedEntity,
46 #endif
47 _DummyDecl
48 };
49
50 static PyObject *ErrorObject;
51
52 /* ----------------------------------------------------- */
53
54 /* Declarations for objects of type xmlparser */
55
56 typedef struct {
57 PyObject_HEAD
58
59 XML_Parser itself;
60 int ordered_attributes; /* Return attributes as a list. */
61 int specified_attributes; /* Report only specified attributes. */
62 int in_callback; /* Is a callback active? */
63 int ns_prefixes; /* Namespace-triplets mode? */
64 XML_Char *buffer; /* Buffer used when accumulating characters */
65 /* NULL if not enabled */
66 int buffer_size; /* Size of buffer, in XML_Char units */
67 int buffer_used; /* Buffer units in use */
68 PyObject *intern; /* Dictionary to intern strings */
69 PyObject **handlers;
70 } xmlparseobject;
71
72 #include "clinic/pyexpat.c.h"
73
74 #define CHARACTER_DATA_BUFFER_SIZE 8192
75
76 static PyTypeObject Xmlparsetype;
77
78 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
79 typedef void* xmlhandler;
80
81 struct HandlerInfo {
82 const char *name;
83 xmlhandlersetter setter;
84 xmlhandler handler;
85 PyGetSetDef getset;
86 };
87
88 static struct HandlerInfo handler_info[64];
89
90 /* Set an integer attribute on the error object; return true on success,
91 * false on an exception.
92 */
93 static int
set_error_attr(PyObject * err,const char * name,int value)94 set_error_attr(PyObject *err, const char *name, int value)
95 {
96 PyObject *v = PyLong_FromLong(value);
97
98 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
99 Py_XDECREF(v);
100 return 0;
101 }
102 Py_DECREF(v);
103 return 1;
104 }
105
106 /* Build and set an Expat exception, including positioning
107 * information. Always returns NULL.
108 */
109 static PyObject *
set_error(xmlparseobject * self,enum XML_Error code)110 set_error(xmlparseobject *self, enum XML_Error code)
111 {
112 PyObject *err;
113 PyObject *buffer;
114 XML_Parser parser = self->itself;
115 int lineno = XML_GetErrorLineNumber(parser);
116 int column = XML_GetErrorColumnNumber(parser);
117
118 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
119 XML_ErrorString(code), lineno, column);
120 if (buffer == NULL)
121 return NULL;
122 err = PyObject_CallFunctionObjArgs(ErrorObject, buffer, NULL);
123 Py_DECREF(buffer);
124 if ( err != NULL
125 && set_error_attr(err, "code", code)
126 && set_error_attr(err, "offset", column)
127 && set_error_attr(err, "lineno", lineno)) {
128 PyErr_SetObject(ErrorObject, err);
129 }
130 Py_XDECREF(err);
131 return NULL;
132 }
133
134 static int
have_handler(xmlparseobject * self,int type)135 have_handler(xmlparseobject *self, int type)
136 {
137 PyObject *handler = self->handlers[type];
138 return handler != NULL;
139 }
140
141 /* Convert a string of XML_Chars into a Unicode string.
142 Returns None if str is a null pointer. */
143
144 static PyObject *
conv_string_to_unicode(const XML_Char * str)145 conv_string_to_unicode(const XML_Char *str)
146 {
147 /* XXX currently this code assumes that XML_Char is 8-bit,
148 and hence in UTF-8. */
149 /* UTF-8 from Expat, Unicode desired */
150 if (str == NULL) {
151 Py_RETURN_NONE;
152 }
153 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
154 }
155
156 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)157 conv_string_len_to_unicode(const XML_Char *str, int len)
158 {
159 /* XXX currently this code assumes that XML_Char is 8-bit,
160 and hence in UTF-8. */
161 /* UTF-8 from Expat, Unicode desired */
162 if (str == NULL) {
163 Py_RETURN_NONE;
164 }
165 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
166 }
167
168 /* Callback routines */
169
170 static void clear_handlers(xmlparseobject *self, int initial);
171
172 /* This handler is used when an error has been detected, in the hope
173 that actual parsing can be terminated early. This will only help
174 if an external entity reference is encountered. */
175 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)176 error_external_entity_ref_handler(XML_Parser parser,
177 const XML_Char *context,
178 const XML_Char *base,
179 const XML_Char *systemId,
180 const XML_Char *publicId)
181 {
182 return 0;
183 }
184
185 /* Dummy character data handler used when an error (exception) has
186 been detected, and the actual parsing can be terminated early.
187 This is needed since character data handler can't be safely removed
188 from within the character data handler, but can be replaced. It is
189 used only from the character data handler trampoline, and must be
190 used right after `flag_error()` is called. */
191 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)192 noop_character_data_handler(void *userData, const XML_Char *data, int len)
193 {
194 /* Do nothing. */
195 }
196
197 static void
flag_error(xmlparseobject * self)198 flag_error(xmlparseobject *self)
199 {
200 clear_handlers(self, 0);
201 XML_SetExternalEntityRefHandler(self->itself,
202 error_external_entity_ref_handler);
203 }
204
205 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)206 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
207 xmlparseobject *self)
208 {
209 PyObject *res;
210
211 res = PyEval_CallObject(func, args);
212 if (res == NULL) {
213 _PyTraceback_Add(funcname, __FILE__, lineno);
214 XML_StopParser(self->itself, XML_FALSE);
215 }
216 return res;
217 }
218
219 static PyObject*
string_intern(xmlparseobject * self,const char * str)220 string_intern(xmlparseobject *self, const char* str)
221 {
222 PyObject *result = conv_string_to_unicode(str);
223 PyObject *value;
224 /* result can be NULL if the unicode conversion failed. */
225 if (!result)
226 return result;
227 if (!self->intern)
228 return result;
229 value = PyDict_GetItemWithError(self->intern, result);
230 if (!value) {
231 if (!PyErr_Occurred() &&
232 PyDict_SetItem(self->intern, result, result) == 0)
233 {
234 return result;
235 }
236 else {
237 Py_DECREF(result);
238 return NULL;
239 }
240 }
241 Py_INCREF(value);
242 Py_DECREF(result);
243 return value;
244 }
245
246 /* Return 0 on success, -1 on exception.
247 * flag_error() will be called before return if needed.
248 */
249 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)250 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
251 {
252 PyObject *args;
253 PyObject *temp;
254
255 if (!have_handler(self, CharacterData))
256 return -1;
257
258 args = PyTuple_New(1);
259 if (args == NULL)
260 return -1;
261 temp = (conv_string_len_to_unicode(buffer, len));
262 if (temp == NULL) {
263 Py_DECREF(args);
264 flag_error(self);
265 XML_SetCharacterDataHandler(self->itself,
266 noop_character_data_handler);
267 return -1;
268 }
269 PyTuple_SET_ITEM(args, 0, temp);
270 /* temp is now a borrowed reference; consider it unused. */
271 self->in_callback = 1;
272 temp = call_with_frame("CharacterData", __LINE__,
273 self->handlers[CharacterData], args, self);
274 /* temp is an owned reference again, or NULL */
275 self->in_callback = 0;
276 Py_DECREF(args);
277 if (temp == NULL) {
278 flag_error(self);
279 XML_SetCharacterDataHandler(self->itself,
280 noop_character_data_handler);
281 return -1;
282 }
283 Py_DECREF(temp);
284 return 0;
285 }
286
287 static int
flush_character_buffer(xmlparseobject * self)288 flush_character_buffer(xmlparseobject *self)
289 {
290 int rc;
291 if (self->buffer == NULL || self->buffer_used == 0)
292 return 0;
293 rc = call_character_handler(self, self->buffer, self->buffer_used);
294 self->buffer_used = 0;
295 return rc;
296 }
297
298 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)299 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
300 {
301 xmlparseobject *self = (xmlparseobject *) userData;
302
303 if (PyErr_Occurred())
304 return;
305
306 if (self->buffer == NULL)
307 call_character_handler(self, data, len);
308 else {
309 if ((self->buffer_used + len) > self->buffer_size) {
310 if (flush_character_buffer(self) < 0)
311 return;
312 /* handler might have changed; drop the rest on the floor
313 * if there isn't a handler anymore
314 */
315 if (!have_handler(self, CharacterData))
316 return;
317 }
318 if (len > self->buffer_size) {
319 call_character_handler(self, data, len);
320 self->buffer_used = 0;
321 }
322 else {
323 memcpy(self->buffer + self->buffer_used,
324 data, len * sizeof(XML_Char));
325 self->buffer_used += len;
326 }
327 }
328 }
329
330 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])331 my_StartElementHandler(void *userData,
332 const XML_Char *name, const XML_Char *atts[])
333 {
334 xmlparseobject *self = (xmlparseobject *)userData;
335
336 if (have_handler(self, StartElement)) {
337 PyObject *container, *rv, *args;
338 int i, max;
339
340 if (PyErr_Occurred())
341 return;
342
343 if (flush_character_buffer(self) < 0)
344 return;
345 /* Set max to the number of slots filled in atts[]; max/2 is
346 * the number of attributes we need to process.
347 */
348 if (self->specified_attributes) {
349 max = XML_GetSpecifiedAttributeCount(self->itself);
350 }
351 else {
352 max = 0;
353 while (atts[max] != NULL)
354 max += 2;
355 }
356 /* Build the container. */
357 if (self->ordered_attributes)
358 container = PyList_New(max);
359 else
360 container = PyDict_New();
361 if (container == NULL) {
362 flag_error(self);
363 return;
364 }
365 for (i = 0; i < max; i += 2) {
366 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
367 PyObject *v;
368 if (n == NULL) {
369 flag_error(self);
370 Py_DECREF(container);
371 return;
372 }
373 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
374 if (v == NULL) {
375 flag_error(self);
376 Py_DECREF(container);
377 Py_DECREF(n);
378 return;
379 }
380 if (self->ordered_attributes) {
381 PyList_SET_ITEM(container, i, n);
382 PyList_SET_ITEM(container, i+1, v);
383 }
384 else if (PyDict_SetItem(container, n, v)) {
385 flag_error(self);
386 Py_DECREF(n);
387 Py_DECREF(v);
388 Py_DECREF(container);
389 return;
390 }
391 else {
392 Py_DECREF(n);
393 Py_DECREF(v);
394 }
395 }
396 args = string_intern(self, name);
397 if (args == NULL) {
398 Py_DECREF(container);
399 return;
400 }
401 args = Py_BuildValue("(NN)", args, container);
402 if (args == NULL) {
403 return;
404 }
405 /* Container is now a borrowed reference; ignore it. */
406 self->in_callback = 1;
407 rv = call_with_frame("StartElement", __LINE__,
408 self->handlers[StartElement], args, self);
409 self->in_callback = 0;
410 Py_DECREF(args);
411 if (rv == NULL) {
412 flag_error(self);
413 return;
414 }
415 Py_DECREF(rv);
416 }
417 }
418
419 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
420 RETURN, GETUSERDATA) \
421 static RC \
422 my_##NAME##Handler PARAMS {\
423 xmlparseobject *self = GETUSERDATA ; \
424 PyObject *args = NULL; \
425 PyObject *rv = NULL; \
426 INIT \
427 \
428 if (have_handler(self, NAME)) { \
429 if (PyErr_Occurred()) \
430 return RETURN; \
431 if (flush_character_buffer(self) < 0) \
432 return RETURN; \
433 args = Py_BuildValue PARAM_FORMAT ;\
434 if (!args) { flag_error(self); return RETURN;} \
435 self->in_callback = 1; \
436 rv = call_with_frame(#NAME,__LINE__, \
437 self->handlers[NAME], args, self); \
438 self->in_callback = 0; \
439 Py_DECREF(args); \
440 if (rv == NULL) { \
441 flag_error(self); \
442 return RETURN; \
443 } \
444 CONVERSION \
445 Py_DECREF(rv); \
446 } \
447 return RETURN; \
448 }
449
450 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
451 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
452 (xmlparseobject *)userData)
453
454 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
455 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
456 rc = PyLong_AsLong(rv);, rc, \
457 (xmlparseobject *)userData)
458
459 VOID_HANDLER(EndElement,
460 (void *userData, const XML_Char *name),
461 ("(N)", string_intern(self, name)))
462
463 VOID_HANDLER(ProcessingInstruction,
464 (void *userData,
465 const XML_Char *target,
466 const XML_Char *data),
467 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
468
469 VOID_HANDLER(UnparsedEntityDecl,
470 (void *userData,
471 const XML_Char *entityName,
472 const XML_Char *base,
473 const XML_Char *systemId,
474 const XML_Char *publicId,
475 const XML_Char *notationName),
476 ("(NNNNN)",
477 string_intern(self, entityName), string_intern(self, base),
478 string_intern(self, systemId), string_intern(self, publicId),
479 string_intern(self, notationName)))
480
481 VOID_HANDLER(EntityDecl,
482 (void *userData,
483 const XML_Char *entityName,
484 int is_parameter_entity,
485 const XML_Char *value,
486 int value_length,
487 const XML_Char *base,
488 const XML_Char *systemId,
489 const XML_Char *publicId,
490 const XML_Char *notationName),
491 ("NiNNNNN",
492 string_intern(self, entityName), is_parameter_entity,
493 (conv_string_len_to_unicode(value, value_length)),
494 string_intern(self, base), string_intern(self, systemId),
495 string_intern(self, publicId),
496 string_intern(self, notationName)))
497
498 VOID_HANDLER(XmlDecl,
499 (void *userData,
500 const XML_Char *version,
501 const XML_Char *encoding,
502 int standalone),
503 ("(O&O&i)",
504 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
505 standalone))
506
507 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))508 conv_content_model(XML_Content * const model,
509 PyObject *(*conv_string)(const XML_Char *))
510 {
511 PyObject *result = NULL;
512 PyObject *children = PyTuple_New(model->numchildren);
513 int i;
514
515 if (children != NULL) {
516 assert(model->numchildren < INT_MAX);
517 for (i = 0; i < (int)model->numchildren; ++i) {
518 PyObject *child = conv_content_model(&model->children[i],
519 conv_string);
520 if (child == NULL) {
521 Py_XDECREF(children);
522 return NULL;
523 }
524 PyTuple_SET_ITEM(children, i, child);
525 }
526 result = Py_BuildValue("(iiO&N)",
527 model->type, model->quant,
528 conv_string,model->name, children);
529 }
530 return result;
531 }
532
533 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)534 my_ElementDeclHandler(void *userData,
535 const XML_Char *name,
536 XML_Content *model)
537 {
538 xmlparseobject *self = (xmlparseobject *)userData;
539 PyObject *args = NULL;
540
541 if (have_handler(self, ElementDecl)) {
542 PyObject *rv = NULL;
543 PyObject *modelobj, *nameobj;
544
545 if (PyErr_Occurred())
546 return;
547
548 if (flush_character_buffer(self) < 0)
549 goto finally;
550 modelobj = conv_content_model(model, (conv_string_to_unicode));
551 if (modelobj == NULL) {
552 flag_error(self);
553 goto finally;
554 }
555 nameobj = string_intern(self, name);
556 if (nameobj == NULL) {
557 Py_DECREF(modelobj);
558 flag_error(self);
559 goto finally;
560 }
561 args = Py_BuildValue("NN", nameobj, modelobj);
562 if (args == NULL) {
563 flag_error(self);
564 goto finally;
565 }
566 self->in_callback = 1;
567 rv = call_with_frame("ElementDecl", __LINE__,
568 self->handlers[ElementDecl], args, self);
569 self->in_callback = 0;
570 if (rv == NULL) {
571 flag_error(self);
572 goto finally;
573 }
574 Py_DECREF(rv);
575 }
576 finally:
577 Py_XDECREF(args);
578 XML_FreeContentModel(self->itself, model);
579 return;
580 }
581
582 VOID_HANDLER(AttlistDecl,
583 (void *userData,
584 const XML_Char *elname,
585 const XML_Char *attname,
586 const XML_Char *att_type,
587 const XML_Char *dflt,
588 int isrequired),
589 ("(NNO&O&i)",
590 string_intern(self, elname), string_intern(self, attname),
591 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
592 isrequired))
593
594 #if XML_COMBINED_VERSION >= 19504
595 VOID_HANDLER(SkippedEntity,
596 (void *userData,
597 const XML_Char *entityName,
598 int is_parameter_entity),
599 ("Ni",
600 string_intern(self, entityName), is_parameter_entity))
601 #endif
602
603 VOID_HANDLER(NotationDecl,
604 (void *userData,
605 const XML_Char *notationName,
606 const XML_Char *base,
607 const XML_Char *systemId,
608 const XML_Char *publicId),
609 ("(NNNN)",
610 string_intern(self, notationName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId)))
612
613 VOID_HANDLER(StartNamespaceDecl,
614 (void *userData,
615 const XML_Char *prefix,
616 const XML_Char *uri),
617 ("(NN)",
618 string_intern(self, prefix), string_intern(self, uri)))
619
620 VOID_HANDLER(EndNamespaceDecl,
621 (void *userData,
622 const XML_Char *prefix),
623 ("(N)", string_intern(self, prefix)))
624
625 VOID_HANDLER(Comment,
626 (void *userData, const XML_Char *data),
627 ("(O&)", conv_string_to_unicode ,data))
628
629 VOID_HANDLER(StartCdataSection,
630 (void *userData),
631 ("()"))
632
633 VOID_HANDLER(EndCdataSection,
634 (void *userData),
635 ("()"))
636
637 VOID_HANDLER(Default,
638 (void *userData, const XML_Char *s, int len),
639 ("(N)", (conv_string_len_to_unicode(s,len))))
640
641 VOID_HANDLER(DefaultHandlerExpand,
642 (void *userData, const XML_Char *s, int len),
643 ("(N)", (conv_string_len_to_unicode(s,len))))
644 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
645
646 INT_HANDLER(NotStandalone,
647 (void *userData),
648 ("()"))
649
650 RC_HANDLER(int, ExternalEntityRef,
651 (XML_Parser parser,
652 const XML_Char *context,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId),
656 int rc=0;,
657 ("(O&NNN)",
658 conv_string_to_unicode ,context, string_intern(self, base),
659 string_intern(self, systemId), string_intern(self, publicId)),
660 rc = PyLong_AsLong(rv);, rc,
661 XML_GetUserData(parser))
662
663 /* XXX UnknownEncodingHandler */
664
665 VOID_HANDLER(StartDoctypeDecl,
666 (void *userData, const XML_Char *doctypeName,
667 const XML_Char *sysid, const XML_Char *pubid,
668 int has_internal_subset),
669 ("(NNNi)", string_intern(self, doctypeName),
670 string_intern(self, sysid), string_intern(self, pubid),
671 has_internal_subset))
672
673 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
674
675 /* ---------------------------------------------------------------- */
676 /*[clinic input]
677 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
678 [clinic start generated code]*/
679 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
680
681
682 static PyObject *
get_parse_result(xmlparseobject * self,int rv)683 get_parse_result(xmlparseobject *self, int rv)
684 {
685 if (PyErr_Occurred()) {
686 return NULL;
687 }
688 if (rv == 0) {
689 return set_error(self, XML_GetErrorCode(self->itself));
690 }
691 if (flush_character_buffer(self) < 0) {
692 return NULL;
693 }
694 return PyLong_FromLong(rv);
695 }
696
697 #define MAX_CHUNK_SIZE (1 << 20)
698
699 /*[clinic input]
700 pyexpat.xmlparser.Parse
701
702 data: object
703 isfinal: bool(accept={int}) = False
704 /
705
706 Parse XML data.
707
708 `isfinal' should be true at end of input.
709 [clinic start generated code]*/
710
711 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyObject * data,int isfinal)712 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyObject *data,
713 int isfinal)
714 /*[clinic end generated code: output=f4db843dd1f4ed4b input=eb616027bfa9847f]*/
715 {
716 const char *s;
717 Py_ssize_t slen;
718 Py_buffer view;
719 int rc;
720
721 if (PyUnicode_Check(data)) {
722 view.buf = NULL;
723 s = PyUnicode_AsUTF8AndSize(data, &slen);
724 if (s == NULL)
725 return NULL;
726 /* Explicitly set UTF-8 encoding. Return code ignored. */
727 (void)XML_SetEncoding(self->itself, "utf-8");
728 }
729 else {
730 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
731 return NULL;
732 s = view.buf;
733 slen = view.len;
734 }
735
736 while (slen > MAX_CHUNK_SIZE) {
737 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
738 if (!rc)
739 goto done;
740 s += MAX_CHUNK_SIZE;
741 slen -= MAX_CHUNK_SIZE;
742 }
743 Py_BUILD_ASSERT(MAX_CHUNK_SIZE <= INT_MAX);
744 assert(slen <= INT_MAX);
745 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
746
747 done:
748 if (view.buf != NULL)
749 PyBuffer_Release(&view);
750 return get_parse_result(self, rc);
751 }
752
753 /* File reading copied from cPickle */
754
755 #define BUF_SIZE 2048
756
757 static int
readinst(char * buf,int buf_size,PyObject * meth)758 readinst(char *buf, int buf_size, PyObject *meth)
759 {
760 PyObject *str;
761 Py_ssize_t len;
762 const char *ptr;
763
764 str = PyObject_CallFunction(meth, "n", buf_size);
765 if (str == NULL)
766 goto error;
767
768 if (PyBytes_Check(str))
769 ptr = PyBytes_AS_STRING(str);
770 else if (PyByteArray_Check(str))
771 ptr = PyByteArray_AS_STRING(str);
772 else {
773 PyErr_Format(PyExc_TypeError,
774 "read() did not return a bytes object (type=%.400s)",
775 Py_TYPE(str)->tp_name);
776 goto error;
777 }
778 len = Py_SIZE(str);
779 if (len > buf_size) {
780 PyErr_Format(PyExc_ValueError,
781 "read() returned too much data: "
782 "%i bytes requested, %zd returned",
783 buf_size, len);
784 goto error;
785 }
786 memcpy(buf, ptr, len);
787 Py_DECREF(str);
788 /* len <= buf_size <= INT_MAX */
789 return (int)len;
790
791 error:
792 Py_XDECREF(str);
793 return -1;
794 }
795
796 /*[clinic input]
797 pyexpat.xmlparser.ParseFile
798
799 file: object
800 /
801
802 Parse XML data from file-like object.
803 [clinic start generated code]*/
804
805 static PyObject *
pyexpat_xmlparser_ParseFile(xmlparseobject * self,PyObject * file)806 pyexpat_xmlparser_ParseFile(xmlparseobject *self, PyObject *file)
807 /*[clinic end generated code: output=2adc6a13100cc42b input=fbb5a12b6038d735]*/
808 {
809 int rv = 1;
810 PyObject *readmethod = NULL;
811 _Py_IDENTIFIER(read);
812
813 if (_PyObject_LookupAttrId(file, &PyId_read, &readmethod) < 0) {
814 return NULL;
815 }
816 if (readmethod == NULL) {
817 PyErr_SetString(PyExc_TypeError,
818 "argument must have 'read' attribute");
819 return NULL;
820 }
821 for (;;) {
822 int bytes_read;
823 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
824 if (buf == NULL) {
825 Py_XDECREF(readmethod);
826 return get_parse_result(self, 0);
827 }
828
829 bytes_read = readinst(buf, BUF_SIZE, readmethod);
830 if (bytes_read < 0) {
831 Py_DECREF(readmethod);
832 return NULL;
833 }
834 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
835 if (PyErr_Occurred()) {
836 Py_XDECREF(readmethod);
837 return NULL;
838 }
839
840 if (!rv || bytes_read == 0)
841 break;
842 }
843 Py_XDECREF(readmethod);
844 return get_parse_result(self, rv);
845 }
846
847 /*[clinic input]
848 pyexpat.xmlparser.SetBase
849
850 base: str
851 /
852
853 Set the base URL for the parser.
854 [clinic start generated code]*/
855
856 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)857 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
858 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
859 {
860 if (!XML_SetBase(self->itself, base)) {
861 return PyErr_NoMemory();
862 }
863 Py_RETURN_NONE;
864 }
865
866 /*[clinic input]
867 pyexpat.xmlparser.GetBase
868
869 Return base URL string for the parser.
870 [clinic start generated code]*/
871
872 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)873 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
874 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
875 {
876 return Py_BuildValue("z", XML_GetBase(self->itself));
877 }
878
879 /*[clinic input]
880 pyexpat.xmlparser.GetInputContext
881
882 Return the untranslated text of the input that caused the current event.
883
884 If the event was generated by a large amount of text (such as a start tag
885 for an element with many attributes), not all of the text may be available.
886 [clinic start generated code]*/
887
888 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)889 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
890 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
891 {
892 if (self->in_callback) {
893 int offset, size;
894 const char *buffer
895 = XML_GetInputContext(self->itself, &offset, &size);
896
897 if (buffer != NULL)
898 return PyBytes_FromStringAndSize(buffer + offset,
899 size - offset);
900 else
901 Py_RETURN_NONE;
902 }
903 else
904 Py_RETURN_NONE;
905 }
906
907 /*[clinic input]
908 pyexpat.xmlparser.ExternalEntityParserCreate
909
910 context: str(accept={str, NoneType})
911 encoding: str = NULL
912 /
913
914 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
915 [clinic start generated code]*/
916
917 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,const char * context,const char * encoding)918 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
919 const char *context,
920 const char *encoding)
921 /*[clinic end generated code: output=535cda9d7a0fbcd6 input=b906714cc122c322]*/
922 {
923 xmlparseobject *new_parser;
924 int i;
925
926 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
927 if (new_parser == NULL)
928 return NULL;
929 new_parser->buffer_size = self->buffer_size;
930 new_parser->buffer_used = 0;
931 new_parser->buffer = NULL;
932 new_parser->ordered_attributes = self->ordered_attributes;
933 new_parser->specified_attributes = self->specified_attributes;
934 new_parser->in_callback = 0;
935 new_parser->ns_prefixes = self->ns_prefixes;
936 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
937 encoding);
938 new_parser->handlers = 0;
939 new_parser->intern = self->intern;
940 Py_XINCREF(new_parser->intern);
941 PyObject_GC_Track(new_parser);
942
943 if (self->buffer != NULL) {
944 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
945 if (new_parser->buffer == NULL) {
946 Py_DECREF(new_parser);
947 return PyErr_NoMemory();
948 }
949 }
950 if (!new_parser->itself) {
951 Py_DECREF(new_parser);
952 return PyErr_NoMemory();
953 }
954
955 XML_SetUserData(new_parser->itself, (void *)new_parser);
956
957 /* allocate and clear handlers first */
958 for (i = 0; handler_info[i].name != NULL; i++)
959 /* do nothing */;
960
961 new_parser->handlers = PyMem_New(PyObject *, i);
962 if (!new_parser->handlers) {
963 Py_DECREF(new_parser);
964 return PyErr_NoMemory();
965 }
966 clear_handlers(new_parser, 1);
967
968 /* then copy handlers from self */
969 for (i = 0; handler_info[i].name != NULL; i++) {
970 PyObject *handler = self->handlers[i];
971 if (handler != NULL) {
972 Py_INCREF(handler);
973 new_parser->handlers[i] = handler;
974 handler_info[i].setter(new_parser->itself,
975 handler_info[i].handler);
976 }
977 }
978 return (PyObject *)new_parser;
979 }
980
981 /*[clinic input]
982 pyexpat.xmlparser.SetParamEntityParsing
983
984 flag: int
985 /
986
987 Controls parsing of parameter entities (including the external DTD subset).
988
989 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
990 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
991 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
992 was successful.
993 [clinic start generated code]*/
994
995 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)996 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
997 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
998 {
999 flag = XML_SetParamEntityParsing(self->itself, flag);
1000 return PyLong_FromLong(flag);
1001 }
1002
1003
1004 #if XML_COMBINED_VERSION >= 19505
1005 /*[clinic input]
1006 pyexpat.xmlparser.UseForeignDTD
1007
1008 flag: bool = True
1009 /
1010
1011 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1012
1013 This readily allows the use of a 'default' document type controlled by the
1014 application, while still getting the advantage of providing document type
1015 information to the parser. 'flag' defaults to True if not provided.
1016 [clinic start generated code]*/
1017
1018 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,int flag)1019 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, int flag)
1020 /*[clinic end generated code: output=cfaa9aa50bb0f65c input=78144c519d116a6e]*/
1021 {
1022 enum XML_Error rc;
1023
1024 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1025 if (rc != XML_ERROR_NONE) {
1026 return set_error(self, rc);
1027 }
1028 Py_RETURN_NONE;
1029 }
1030 #endif
1031
1032 static struct PyMethodDef xmlparse_methods[] = {
1033 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1034 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1035 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1036 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1037 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1038 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1039 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1040 #if XML_COMBINED_VERSION >= 19505
1041 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1042 #endif
1043 {NULL, NULL} /* sentinel */
1044 };
1045
1046 /* ---------- */
1047
1048
1049
1050 /* pyexpat international encoding support.
1051 Make it as simple as possible.
1052 */
1053
1054 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1055 PyUnknownEncodingHandler(void *encodingHandlerData,
1056 const XML_Char *name,
1057 XML_Encoding *info)
1058 {
1059 static unsigned char template_buffer[256] = {0};
1060 PyObject* u;
1061 int i;
1062 void *data;
1063 unsigned int kind;
1064
1065 if (PyErr_Occurred())
1066 return XML_STATUS_ERROR;
1067
1068 if (template_buffer[1] == 0) {
1069 for (i = 0; i < 256; i++)
1070 template_buffer[i] = i;
1071 }
1072
1073 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1074 if (u == NULL || PyUnicode_READY(u)) {
1075 Py_XDECREF(u);
1076 return XML_STATUS_ERROR;
1077 }
1078
1079 if (PyUnicode_GET_LENGTH(u) != 256) {
1080 Py_DECREF(u);
1081 PyErr_SetString(PyExc_ValueError,
1082 "multi-byte encodings are not supported");
1083 return XML_STATUS_ERROR;
1084 }
1085
1086 kind = PyUnicode_KIND(u);
1087 data = PyUnicode_DATA(u);
1088 for (i = 0; i < 256; i++) {
1089 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1090 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1091 info->map[i] = ch;
1092 else
1093 info->map[i] = -1;
1094 }
1095
1096 info->data = NULL;
1097 info->convert = NULL;
1098 info->release = NULL;
1099 Py_DECREF(u);
1100
1101 return XML_STATUS_OK;
1102 }
1103
1104
1105 static PyObject *
newxmlparseobject(const char * encoding,const char * namespace_separator,PyObject * intern)1106 newxmlparseobject(const char *encoding, const char *namespace_separator, PyObject *intern)
1107 {
1108 int i;
1109 xmlparseobject *self;
1110
1111 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1112 if (self == NULL)
1113 return NULL;
1114
1115 self->buffer = NULL;
1116 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1117 self->buffer_used = 0;
1118 self->ordered_attributes = 0;
1119 self->specified_attributes = 0;
1120 self->in_callback = 0;
1121 self->ns_prefixes = 0;
1122 self->handlers = NULL;
1123 self->intern = intern;
1124 Py_XINCREF(self->intern);
1125 PyObject_GC_Track(self);
1126
1127 /* namespace_separator is either NULL or contains one char + \0 */
1128 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1129 namespace_separator);
1130 if (self->itself == NULL) {
1131 PyErr_SetString(PyExc_RuntimeError,
1132 "XML_ParserCreate failed");
1133 Py_DECREF(self);
1134 return NULL;
1135 }
1136 #if XML_COMBINED_VERSION >= 20100
1137 /* This feature was added upstream in libexpat 2.1.0. */
1138 XML_SetHashSalt(self->itself,
1139 (unsigned long)_Py_HashSecret.expat.hashsalt);
1140 #endif
1141 XML_SetUserData(self->itself, (void *)self);
1142 XML_SetUnknownEncodingHandler(self->itself,
1143 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1144
1145 for (i = 0; handler_info[i].name != NULL; i++)
1146 /* do nothing */;
1147
1148 self->handlers = PyMem_New(PyObject *, i);
1149 if (!self->handlers) {
1150 Py_DECREF(self);
1151 return PyErr_NoMemory();
1152 }
1153 clear_handlers(self, 1);
1154
1155 return (PyObject*)self;
1156 }
1157
1158
1159 static void
xmlparse_dealloc(xmlparseobject * self)1160 xmlparse_dealloc(xmlparseobject *self)
1161 {
1162 int i;
1163 PyObject_GC_UnTrack(self);
1164 if (self->itself != NULL)
1165 XML_ParserFree(self->itself);
1166 self->itself = NULL;
1167
1168 if (self->handlers != NULL) {
1169 for (i = 0; handler_info[i].name != NULL; i++)
1170 Py_CLEAR(self->handlers[i]);
1171 PyMem_Free(self->handlers);
1172 self->handlers = NULL;
1173 }
1174 if (self->buffer != NULL) {
1175 PyMem_Free(self->buffer);
1176 self->buffer = NULL;
1177 }
1178 Py_XDECREF(self->intern);
1179 PyObject_GC_Del(self);
1180 }
1181
1182
1183 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1184 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1185 {
1186 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1187 int handlernum = (int)(hi - handler_info);
1188 PyObject *result = self->handlers[handlernum];
1189 if (result == NULL)
1190 result = Py_None;
1191 Py_INCREF(result);
1192 return result;
1193 }
1194
1195 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1196 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1197 {
1198 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1199 int handlernum = (int)(hi - handler_info);
1200 if (v == NULL) {
1201 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1202 return -1;
1203 }
1204 if (handlernum == CharacterData) {
1205 /* If we're changing the character data handler, flush all
1206 * cached data with the old handler. Not sure there's a
1207 * "right" thing to do, though, but this probably won't
1208 * happen.
1209 */
1210 if (flush_character_buffer(self) < 0)
1211 return -1;
1212 }
1213
1214 xmlhandler c_handler = NULL;
1215 if (v == Py_None) {
1216 /* If this is the character data handler, and a character
1217 data handler is already active, we need to be more
1218 careful. What we can safely do is replace the existing
1219 character data handler callback function with a no-op
1220 function that will refuse to call Python. The downside
1221 is that this doesn't completely remove the character
1222 data handler from the C layer if there's any callback
1223 active, so Expat does a little more work than it
1224 otherwise would, but that's really an odd case. A more
1225 elaborate system of handlers and state could remove the
1226 C handler more effectively. */
1227 if (handlernum == CharacterData && self->in_callback)
1228 c_handler = noop_character_data_handler;
1229 v = NULL;
1230 }
1231 else if (v != NULL) {
1232 Py_INCREF(v);
1233 c_handler = handler_info[handlernum].handler;
1234 }
1235 Py_XSETREF(self->handlers[handlernum], v);
1236 handler_info[handlernum].setter(self->itself, c_handler);
1237 return 0;
1238 }
1239
1240 #define INT_GETTER(name) \
1241 static PyObject * \
1242 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1243 { \
1244 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1245 }
1246 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1247 INT_GETTER(ErrorLineNumber)
1248 INT_GETTER(ErrorColumnNumber)
1249 INT_GETTER(ErrorByteIndex)
1250 INT_GETTER(CurrentLineNumber)
1251 INT_GETTER(CurrentColumnNumber)
1252 INT_GETTER(CurrentByteIndex)
1253
1254 #undef INT_GETTER
1255
1256 static PyObject *
1257 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1258 {
1259 return PyBool_FromLong(self->buffer != NULL);
1260 }
1261
1262 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1263 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1264 {
1265 if (v == NULL) {
1266 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1267 return -1;
1268 }
1269 int b = PyObject_IsTrue(v);
1270 if (b < 0)
1271 return -1;
1272 if (b) {
1273 if (self->buffer == NULL) {
1274 self->buffer = PyMem_Malloc(self->buffer_size);
1275 if (self->buffer == NULL) {
1276 PyErr_NoMemory();
1277 return -1;
1278 }
1279 self->buffer_used = 0;
1280 }
1281 }
1282 else if (self->buffer != NULL) {
1283 if (flush_character_buffer(self) < 0)
1284 return -1;
1285 PyMem_Free(self->buffer);
1286 self->buffer = NULL;
1287 }
1288 return 0;
1289 }
1290
1291 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1292 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1293 {
1294 return PyLong_FromLong((long) self->buffer_size);
1295 }
1296
1297 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1298 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1299 {
1300 if (v == NULL) {
1301 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1302 return -1;
1303 }
1304 long new_buffer_size;
1305 if (!PyLong_Check(v)) {
1306 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1307 return -1;
1308 }
1309
1310 new_buffer_size = PyLong_AsLong(v);
1311 if (new_buffer_size <= 0) {
1312 if (!PyErr_Occurred())
1313 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1314 return -1;
1315 }
1316
1317 /* trivial case -- no change */
1318 if (new_buffer_size == self->buffer_size) {
1319 return 0;
1320 }
1321
1322 /* check maximum */
1323 if (new_buffer_size > INT_MAX) {
1324 char errmsg[100];
1325 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1326 PyErr_SetString(PyExc_ValueError, errmsg);
1327 return -1;
1328 }
1329
1330 if (self->buffer != NULL) {
1331 /* there is already a buffer */
1332 if (self->buffer_used != 0) {
1333 if (flush_character_buffer(self) < 0) {
1334 return -1;
1335 }
1336 }
1337 /* free existing buffer */
1338 PyMem_Free(self->buffer);
1339 }
1340 self->buffer = PyMem_Malloc(new_buffer_size);
1341 if (self->buffer == NULL) {
1342 PyErr_NoMemory();
1343 return -1;
1344 }
1345 self->buffer_size = new_buffer_size;
1346 return 0;
1347 }
1348
1349 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1350 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1351 {
1352 return PyLong_FromLong((long) self->buffer_used);
1353 }
1354
1355 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1356 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1357 {
1358 return PyBool_FromLong(self->ns_prefixes);
1359 }
1360
1361 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1362 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1363 {
1364 if (v == NULL) {
1365 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1366 return -1;
1367 }
1368 int b = PyObject_IsTrue(v);
1369 if (b < 0)
1370 return -1;
1371 self->ns_prefixes = b;
1372 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1373 return 0;
1374 }
1375
1376 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1377 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1378 {
1379 return PyBool_FromLong(self->ordered_attributes);
1380 }
1381
1382 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1383 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1384 {
1385 if (v == NULL) {
1386 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1387 return -1;
1388 }
1389 int b = PyObject_IsTrue(v);
1390 if (b < 0)
1391 return -1;
1392 self->ordered_attributes = b;
1393 return 0;
1394 }
1395
1396 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1397 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1398 {
1399 return PyBool_FromLong((long) self->specified_attributes);
1400 }
1401
1402 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1403 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1404 {
1405 if (v == NULL) {
1406 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1407 return -1;
1408 }
1409 int b = PyObject_IsTrue(v);
1410 if (b < 0)
1411 return -1;
1412 self->specified_attributes = b;
1413 return 0;
1414 }
1415
1416 static PyMemberDef xmlparse_members[] = {
1417 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1418 {NULL}
1419 };
1420
1421 #define XMLPARSE_GETTER_DEF(name) \
1422 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1423 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1424 {#name, (getter)xmlparse_##name##_getter, \
1425 (setter)xmlparse_##name##_setter, NULL},
1426
1427 static PyGetSetDef xmlparse_getsetlist[] = {
1428 XMLPARSE_GETTER_DEF(ErrorCode)
1429 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1430 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1431 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1432 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1433 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1434 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1435 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1436 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1437 XMLPARSE_GETTER_DEF(buffer_used)
1438 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1439 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1440 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1441 {NULL},
1442 };
1443
1444 #undef XMLPARSE_GETTER_DEF
1445 #undef XMLPARSE_GETTER_SETTER_DEF
1446
1447 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1448 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1449 {
1450 int i;
1451 for (i = 0; handler_info[i].name != NULL; i++)
1452 Py_VISIT(op->handlers[i]);
1453 return 0;
1454 }
1455
1456 static int
xmlparse_clear(xmlparseobject * op)1457 xmlparse_clear(xmlparseobject *op)
1458 {
1459 clear_handlers(op, 0);
1460 Py_CLEAR(op->intern);
1461 return 0;
1462 }
1463
1464 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1465
1466 static PyTypeObject Xmlparsetype = {
1467 PyVarObject_HEAD_INIT(NULL, 0)
1468 "pyexpat.xmlparser", /*tp_name*/
1469 sizeof(xmlparseobject), /*tp_basicsize*/
1470 0, /*tp_itemsize*/
1471 /* methods */
1472 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1473 0, /*tp_vectorcall_offset*/
1474 0, /*tp_getattr*/
1475 0, /*tp_setattr*/
1476 0, /*tp_as_async*/
1477 (reprfunc)0, /*tp_repr*/
1478 0, /*tp_as_number*/
1479 0, /*tp_as_sequence*/
1480 0, /*tp_as_mapping*/
1481 (hashfunc)0, /*tp_hash*/
1482 (ternaryfunc)0, /*tp_call*/
1483 (reprfunc)0, /*tp_str*/
1484 (getattrofunc)0, /* tp_getattro */
1485 (setattrofunc)0, /* tp_setattro */
1486 0, /* tp_as_buffer */
1487 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1488 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1489 (traverseproc)xmlparse_traverse, /* tp_traverse */
1490 (inquiry)xmlparse_clear, /* tp_clear */
1491 0, /* tp_richcompare */
1492 0, /* tp_weaklistoffset */
1493 0, /* tp_iter */
1494 0, /* tp_iternext */
1495 xmlparse_methods, /* tp_methods */
1496 xmlparse_members, /* tp_members */
1497 xmlparse_getsetlist, /* tp_getset */
1498 };
1499
1500 /* End of code for xmlparser objects */
1501 /* -------------------------------------------------------- */
1502
1503 /*[clinic input]
1504 pyexpat.ParserCreate
1505
1506 encoding: str(accept={str, NoneType}) = None
1507 namespace_separator: str(accept={str, NoneType}) = None
1508 intern: object = NULL
1509
1510 Return a new XML parser object.
1511 [clinic start generated code]*/
1512
1513 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1514 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1515 const char *namespace_separator, PyObject *intern)
1516 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1517 {
1518 PyObject *result;
1519 int intern_decref = 0;
1520
1521 if (namespace_separator != NULL
1522 && strlen(namespace_separator) > 1) {
1523 PyErr_SetString(PyExc_ValueError,
1524 "namespace_separator must be at most one"
1525 " character, omitted, or None");
1526 return NULL;
1527 }
1528 /* Explicitly passing None means no interning is desired.
1529 Not passing anything means that a new dictionary is used. */
1530 if (intern == Py_None)
1531 intern = NULL;
1532 else if (intern == NULL) {
1533 intern = PyDict_New();
1534 if (!intern)
1535 return NULL;
1536 intern_decref = 1;
1537 }
1538 else if (!PyDict_Check(intern)) {
1539 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1540 return NULL;
1541 }
1542
1543 result = newxmlparseobject(encoding, namespace_separator, intern);
1544 if (intern_decref) {
1545 Py_DECREF(intern);
1546 }
1547 return result;
1548 }
1549
1550 /*[clinic input]
1551 pyexpat.ErrorString
1552
1553 code: long
1554 /
1555
1556 Returns string error for given number.
1557 [clinic start generated code]*/
1558
1559 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1560 pyexpat_ErrorString_impl(PyObject *module, long code)
1561 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1562 {
1563 return Py_BuildValue("z", XML_ErrorString((int)code));
1564 }
1565
1566 /* List of methods defined in the module */
1567
1568 static struct PyMethodDef pyexpat_methods[] = {
1569 PYEXPAT_PARSERCREATE_METHODDEF
1570 PYEXPAT_ERRORSTRING_METHODDEF
1571 {NULL, NULL} /* sentinel */
1572 };
1573
1574 /* Module docstring */
1575
1576 PyDoc_STRVAR(pyexpat_module_documentation,
1577 "Python wrapper for Expat parser.");
1578
1579 /* Initialization function for the module */
1580
1581 #ifndef MODULE_NAME
1582 #define MODULE_NAME "pyexpat"
1583 #endif
1584
1585 #ifndef MODULE_INITFUNC
1586 #define MODULE_INITFUNC PyInit_pyexpat
1587 #endif
1588
1589 static struct PyModuleDef pyexpatmodule = {
1590 PyModuleDef_HEAD_INIT,
1591 MODULE_NAME,
1592 pyexpat_module_documentation,
1593 -1,
1594 pyexpat_methods,
1595 NULL,
1596 NULL,
1597 NULL,
1598 NULL
1599 };
1600
init_handler_descrs(void)1601 static int init_handler_descrs(void)
1602 {
1603 int i;
1604 assert(!PyType_HasFeature(&Xmlparsetype, Py_TPFLAGS_VALID_VERSION_TAG));
1605 for (i = 0; handler_info[i].name != NULL; i++) {
1606 struct HandlerInfo *hi = &handler_info[i];
1607 hi->getset.name = hi->name;
1608 hi->getset.get = (getter)xmlparse_handler_getter;
1609 hi->getset.set = (setter)xmlparse_handler_setter;
1610 hi->getset.closure = &handler_info[i];
1611
1612 PyObject *descr = PyDescr_NewGetSet(&Xmlparsetype, &hi->getset);
1613 if (descr == NULL)
1614 return -1;
1615
1616 if (PyDict_GetItemWithError(Xmlparsetype.tp_dict, PyDescr_NAME(descr))) {
1617 Py_DECREF(descr);
1618 continue;
1619 }
1620 else if (PyErr_Occurred()) {
1621 Py_DECREF(descr);
1622 return -1;
1623 }
1624 if (PyDict_SetItem(Xmlparsetype.tp_dict, PyDescr_NAME(descr), descr) < 0) {
1625 Py_DECREF(descr);
1626 return -1;
1627 }
1628 Py_DECREF(descr);
1629 }
1630 return 0;
1631 }
1632
1633 PyMODINIT_FUNC
MODULE_INITFUNC(void)1634 MODULE_INITFUNC(void)
1635 {
1636 PyObject *m, *d;
1637 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
1638 PyObject *errors_module;
1639 PyObject *modelmod_name;
1640 PyObject *model_module;
1641 PyObject *tmpnum, *tmpstr;
1642 PyObject *codes_dict;
1643 PyObject *rev_codes_dict;
1644 int res;
1645 static struct PyExpat_CAPI capi;
1646 PyObject *capi_object;
1647
1648 if (errmod_name == NULL)
1649 return NULL;
1650 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
1651 if (modelmod_name == NULL)
1652 return NULL;
1653
1654 if (PyType_Ready(&Xmlparsetype) < 0 || init_handler_descrs() < 0)
1655 return NULL;
1656
1657 /* Create the module and add the functions */
1658 m = PyModule_Create(&pyexpatmodule);
1659 if (m == NULL)
1660 return NULL;
1661
1662 /* Add some symbolic constants to the module */
1663 if (ErrorObject == NULL) {
1664 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
1665 NULL, NULL);
1666 if (ErrorObject == NULL)
1667 return NULL;
1668 }
1669 Py_INCREF(ErrorObject);
1670 PyModule_AddObject(m, "error", ErrorObject);
1671 Py_INCREF(ErrorObject);
1672 PyModule_AddObject(m, "ExpatError", ErrorObject);
1673 Py_INCREF(&Xmlparsetype);
1674 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
1675
1676 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1677 XML_ExpatVersion());
1678 {
1679 XML_Expat_Version info = XML_ExpatVersionInfo();
1680 PyModule_AddObject(m, "version_info",
1681 Py_BuildValue("(iii)", info.major,
1682 info.minor, info.micro));
1683 }
1684 /* XXX When Expat supports some way of figuring out how it was
1685 compiled, this should check and set native_encoding
1686 appropriately.
1687 */
1688 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
1689
1690 d = PyModule_GetDict(m);
1691 if (d == NULL) {
1692 Py_DECREF(m);
1693 return NULL;
1694 }
1695 errors_module = PyDict_GetItemWithError(d, errmod_name);
1696 if (errors_module == NULL && !PyErr_Occurred()) {
1697 errors_module = PyModule_New(MODULE_NAME ".errors");
1698 if (errors_module != NULL) {
1699 _PyImport_SetModule(errmod_name, errors_module);
1700 /* gives away the reference to errors_module */
1701 PyModule_AddObject(m, "errors", errors_module);
1702 }
1703 }
1704 Py_DECREF(errmod_name);
1705 model_module = PyDict_GetItemWithError(d, modelmod_name);
1706 if (model_module == NULL && !PyErr_Occurred()) {
1707 model_module = PyModule_New(MODULE_NAME ".model");
1708 if (model_module != NULL) {
1709 _PyImport_SetModule(modelmod_name, model_module);
1710 /* gives away the reference to model_module */
1711 PyModule_AddObject(m, "model", model_module);
1712 }
1713 }
1714 Py_DECREF(modelmod_name);
1715 if (errors_module == NULL || model_module == NULL) {
1716 /* Don't core dump later! */
1717 Py_DECREF(m);
1718 return NULL;
1719 }
1720
1721 #if XML_COMBINED_VERSION > 19505
1722 {
1723 const XML_Feature *features = XML_GetFeatureList();
1724 PyObject *list = PyList_New(0);
1725 if (list == NULL)
1726 /* just ignore it */
1727 PyErr_Clear();
1728 else {
1729 int i = 0;
1730 for (; features[i].feature != XML_FEATURE_END; ++i) {
1731 int ok;
1732 PyObject *item = Py_BuildValue("si", features[i].name,
1733 features[i].value);
1734 if (item == NULL) {
1735 Py_DECREF(list);
1736 list = NULL;
1737 break;
1738 }
1739 ok = PyList_Append(list, item);
1740 Py_DECREF(item);
1741 if (ok < 0) {
1742 PyErr_Clear();
1743 break;
1744 }
1745 }
1746 if (list != NULL)
1747 PyModule_AddObject(m, "features", list);
1748 }
1749 }
1750 #endif
1751
1752 codes_dict = PyDict_New();
1753 rev_codes_dict = PyDict_New();
1754 if (codes_dict == NULL || rev_codes_dict == NULL) {
1755 Py_XDECREF(codes_dict);
1756 Py_XDECREF(rev_codes_dict);
1757 return NULL;
1758 }
1759
1760 #define MYCONST(name) \
1761 if (PyModule_AddStringConstant(errors_module, #name, \
1762 XML_ErrorString(name)) < 0) \
1763 return NULL; \
1764 tmpnum = PyLong_FromLong(name); \
1765 if (tmpnum == NULL) return NULL; \
1766 res = PyDict_SetItemString(codes_dict, \
1767 XML_ErrorString(name), tmpnum); \
1768 if (res < 0) return NULL; \
1769 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1770 if (tmpstr == NULL) return NULL; \
1771 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1772 Py_DECREF(tmpstr); \
1773 Py_DECREF(tmpnum); \
1774 if (res < 0) return NULL; \
1775
1776 MYCONST(XML_ERROR_NO_MEMORY);
1777 MYCONST(XML_ERROR_SYNTAX);
1778 MYCONST(XML_ERROR_NO_ELEMENTS);
1779 MYCONST(XML_ERROR_INVALID_TOKEN);
1780 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1781 MYCONST(XML_ERROR_PARTIAL_CHAR);
1782 MYCONST(XML_ERROR_TAG_MISMATCH);
1783 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1784 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1785 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1786 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1787 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1788 MYCONST(XML_ERROR_ASYNC_ENTITY);
1789 MYCONST(XML_ERROR_BAD_CHAR_REF);
1790 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1791 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1792 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1793 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1794 MYCONST(XML_ERROR_INCORRECT_ENCODING);
1795 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1796 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1797 MYCONST(XML_ERROR_NOT_STANDALONE);
1798 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1799 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1800 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1801 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1802 /* Added in Expat 1.95.7. */
1803 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1804 /* Added in Expat 1.95.8. */
1805 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1806 MYCONST(XML_ERROR_INCOMPLETE_PE);
1807 MYCONST(XML_ERROR_XML_DECL);
1808 MYCONST(XML_ERROR_TEXT_DECL);
1809 MYCONST(XML_ERROR_PUBLICID);
1810 MYCONST(XML_ERROR_SUSPENDED);
1811 MYCONST(XML_ERROR_NOT_SUSPENDED);
1812 MYCONST(XML_ERROR_ABORTED);
1813 MYCONST(XML_ERROR_FINISHED);
1814 MYCONST(XML_ERROR_SUSPEND_PE);
1815
1816 if (PyModule_AddStringConstant(errors_module, "__doc__",
1817 "Constants used to describe "
1818 "error conditions.") < 0)
1819 return NULL;
1820
1821 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1822 return NULL;
1823 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1824 return NULL;
1825
1826 #undef MYCONST
1827
1828 #define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
1829 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1830 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1831 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1832 #undef MYCONST
1833
1834 #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1835 PyModule_AddStringConstant(model_module, "__doc__",
1836 "Constants used to interpret content model information.");
1837
1838 MYCONST(XML_CTYPE_EMPTY);
1839 MYCONST(XML_CTYPE_ANY);
1840 MYCONST(XML_CTYPE_MIXED);
1841 MYCONST(XML_CTYPE_NAME);
1842 MYCONST(XML_CTYPE_CHOICE);
1843 MYCONST(XML_CTYPE_SEQ);
1844
1845 MYCONST(XML_CQUANT_NONE);
1846 MYCONST(XML_CQUANT_OPT);
1847 MYCONST(XML_CQUANT_REP);
1848 MYCONST(XML_CQUANT_PLUS);
1849 #undef MYCONST
1850
1851 /* initialize pyexpat dispatch table */
1852 capi.size = sizeof(capi);
1853 capi.magic = PyExpat_CAPI_MAGIC;
1854 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1855 capi.MINOR_VERSION = XML_MINOR_VERSION;
1856 capi.MICRO_VERSION = XML_MICRO_VERSION;
1857 capi.ErrorString = XML_ErrorString;
1858 capi.GetErrorCode = XML_GetErrorCode;
1859 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1860 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
1861 capi.Parse = XML_Parse;
1862 capi.ParserCreate_MM = XML_ParserCreate_MM;
1863 capi.ParserFree = XML_ParserFree;
1864 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1865 capi.SetCommentHandler = XML_SetCommentHandler;
1866 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1867 capi.SetElementHandler = XML_SetElementHandler;
1868 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1869 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1870 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1871 capi.SetUserData = XML_SetUserData;
1872 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1873 capi.SetEncoding = XML_SetEncoding;
1874 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
1875 #if XML_COMBINED_VERSION >= 20100
1876 capi.SetHashSalt = XML_SetHashSalt;
1877 #else
1878 capi.SetHashSalt = NULL;
1879 #endif
1880
1881 /* export using capsule */
1882 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
1883 if (capi_object)
1884 PyModule_AddObject(m, "expat_CAPI", capi_object);
1885 return m;
1886 }
1887
1888 static void
clear_handlers(xmlparseobject * self,int initial)1889 clear_handlers(xmlparseobject *self, int initial)
1890 {
1891 int i = 0;
1892
1893 for (; handler_info[i].name != NULL; i++) {
1894 if (initial)
1895 self->handlers[i] = NULL;
1896 else {
1897 Py_CLEAR(self->handlers[i]);
1898 handler_info[i].setter(self->itself, NULL);
1899 }
1900 }
1901 }
1902
1903 static struct HandlerInfo handler_info[] = {
1904
1905 #define HANDLER_INFO(name) \
1906 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
1907
1908 HANDLER_INFO(StartElementHandler)
1909 HANDLER_INFO(EndElementHandler)
1910 HANDLER_INFO(ProcessingInstructionHandler)
1911 HANDLER_INFO(CharacterDataHandler)
1912 HANDLER_INFO(UnparsedEntityDeclHandler)
1913 HANDLER_INFO(NotationDeclHandler)
1914 HANDLER_INFO(StartNamespaceDeclHandler)
1915 HANDLER_INFO(EndNamespaceDeclHandler)
1916 HANDLER_INFO(CommentHandler)
1917 HANDLER_INFO(StartCdataSectionHandler)
1918 HANDLER_INFO(EndCdataSectionHandler)
1919 HANDLER_INFO(DefaultHandler)
1920 HANDLER_INFO(DefaultHandlerExpand)
1921 HANDLER_INFO(NotStandaloneHandler)
1922 HANDLER_INFO(ExternalEntityRefHandler)
1923 HANDLER_INFO(StartDoctypeDeclHandler)
1924 HANDLER_INFO(EndDoctypeDeclHandler)
1925 HANDLER_INFO(EntityDeclHandler)
1926 HANDLER_INFO(XmlDeclHandler)
1927 HANDLER_INFO(ElementDeclHandler)
1928 HANDLER_INFO(AttlistDeclHandler)
1929 #if XML_COMBINED_VERSION >= 19504
1930 HANDLER_INFO(SkippedEntityHandler)
1931 #endif
1932
1933 #undef HANDLER_INFO
1934
1935 {NULL, NULL, NULL} /* sentinel */
1936 };
1937