1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
72 #endif
73
74 #include "buf.h"
75 #include "enc.h"
76
77 struct _xmlStartTag {
78 const xmlChar *prefix;
79 const xmlChar *URI;
80 int line;
81 int nsNr;
82 };
83
84 static void
85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89 const xmlChar *base, xmlParserCtxtPtr pctx);
90
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92
93 static int
94 xmlParseElementStart(xmlParserCtxtPtr ctxt);
95
96 static void
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_MAX_HUGE_LENGTH 1000000000
106
107 #define XML_PARSER_BIG_ENTITY 1000
108 #define XML_PARSER_LOT_ENTITY 5000
109
110 /*
111 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112 * replacement over the size in byte of the input indicates that you have
113 * and exponential behaviour. A value of 10 correspond to at least 3 entity
114 * replacement per byte of input.
115 */
116 #define XML_PARSER_NON_LINEAR 10
117
118 /*
119 * xmlParserEntityCheck
120 *
121 * Function to check non-linear entity expansion behaviour
122 * This is here to detect and stop exponential linear entity expansion
123 * This is not a limitation of the parser but a safety
124 * boundary feature. It can be disabled with the XML_PARSE_HUGE
125 * parser option.
126 */
127 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)128 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
129 xmlEntityPtr ent, size_t replacement)
130 {
131 size_t consumed = 0;
132 int i;
133
134 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135 return (0);
136 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137 return (1);
138
139 /*
140 * This may look absurd but is needed to detect
141 * entities problems
142 */
143 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144 (ent->content != NULL) && (ent->checked == 0) &&
145 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146 unsigned long oldnbent = ctxt->nbentities, diff;
147 xmlChar *rep;
148
149 ent->checked = 1;
150
151 ++ctxt->depth;
152 rep = xmlStringDecodeEntities(ctxt, ent->content,
153 XML_SUBSTITUTE_REF, 0, 0, 0);
154 --ctxt->depth;
155 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156 ent->content[0] = 0;
157 }
158
159 diff = ctxt->nbentities - oldnbent + 1;
160 if (diff > INT_MAX / 2)
161 diff = INT_MAX / 2;
162 ent->checked = diff * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
170
171 /*
172 * Prevent entity exponential check, not just replacement while
173 * parsing the DTD
174 * The check is potentially costly so do that only once in a thousand
175 */
176 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177 (ctxt->nbentities % 1024 == 0)) {
178 for (i = 0;i < ctxt->inputNr;i++) {
179 consumed += ctxt->inputTab[i]->consumed +
180 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
181 }
182 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
183 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
184 ctxt->instate = XML_PARSER_EOF;
185 return (1);
186 }
187 consumed = 0;
188 }
189
190
191
192 if (replacement != 0) {
193 if (replacement < XML_MAX_TEXT_LENGTH)
194 return(0);
195
196 /*
197 * If the volume of entity copy reaches 10 times the
198 * amount of parsed data and over the large text threshold
199 * then that's very likely to be an abuse.
200 */
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
204 }
205 consumed += ctxt->sizeentities;
206
207 if (replacement < XML_PARSER_NON_LINEAR * consumed)
208 return(0);
209 } else if (size != 0) {
210 /*
211 * Do the check based on the replacement size of the entity
212 */
213 if (size < XML_PARSER_BIG_ENTITY)
214 return(0);
215
216 /*
217 * A limit on the amount of text data reasonably used
218 */
219 if (ctxt->input != NULL) {
220 consumed = ctxt->input->consumed +
221 (ctxt->input->cur - ctxt->input->base);
222 }
223 consumed += ctxt->sizeentities;
224
225 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
226 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
227 return (0);
228 } else if (ent != NULL) {
229 /*
230 * use the number of parsed entities in the replacement
231 */
232 size = ent->checked / 2;
233
234 /*
235 * The amount of data parsed counting entities size only once
236 */
237 if (ctxt->input != NULL) {
238 consumed = ctxt->input->consumed +
239 (ctxt->input->cur - ctxt->input->base);
240 }
241 consumed += ctxt->sizeentities;
242
243 /*
244 * Check the density of entities for the amount of data
245 * knowing an entity reference will take at least 3 bytes
246 */
247 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
248 return (0);
249 } else {
250 /*
251 * strange we got no data for checking
252 */
253 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
254 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
255 (ctxt->nbentities <= 10000))
256 return (0);
257 }
258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
259 return (1);
260 }
261
262 /**
263 * xmlParserMaxDepth:
264 *
265 * arbitrary depth limit for the XML documents that we allow to
266 * process. This is not a limitation of the parser but a safety
267 * boundary feature. It can be disabled with the XML_PARSE_HUGE
268 * parser option.
269 */
270 unsigned int xmlParserMaxDepth = 256;
271
272
273
274 #define SAX2 1
275 #define XML_PARSER_BIG_BUFFER_SIZE 300
276 #define XML_PARSER_BUFFER_SIZE 100
277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278
279 /**
280 * XML_PARSER_CHUNK_SIZE
281 *
282 * When calling GROW that's the minimal amount of data
283 * the parser expected to have received. It is not a hard
284 * limit but an optimization when reading strings like Names
285 * It is not strictly needed as long as inputs available characters
286 * are followed by 0, which should be provided by the I/O level
287 */
288 #define XML_PARSER_CHUNK_SIZE 100
289
290 /*
291 * List of XML prefixed PI allowed by W3C specs
292 */
293
294 static const char* const xmlW3CPIs[] = {
295 "xml-stylesheet",
296 "xml-model",
297 NULL
298 };
299
300
301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
302 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
303 const xmlChar **str);
304
305 static xmlParserErrors
306 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
307 xmlSAXHandlerPtr sax,
308 void *user_data, int depth, const xmlChar *URL,
309 const xmlChar *ID, xmlNodePtr *list);
310
311 static int
312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
313 const char *encoding);
314 #ifdef LIBXML_LEGACY_ENABLED
315 static void
316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317 xmlNodePtr lastNode);
318 #endif /* LIBXML_LEGACY_ENABLED */
319
320 static xmlParserErrors
321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
322 const xmlChar *string, void *user_data, xmlNodePtr *lst);
323
324 static int
325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
326
327 /************************************************************************
328 * *
329 * Some factorized error routines *
330 * *
331 ************************************************************************/
332
333 /**
334 * xmlErrAttributeDup:
335 * @ctxt: an XML parser context
336 * @prefix: the attribute prefix
337 * @localname: the attribute localname
338 *
339 * Handle a redefinition of attribute error
340 */
341 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
343 const xmlChar * localname)
344 {
345 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346 (ctxt->instate == XML_PARSER_EOF))
347 return;
348 if (ctxt != NULL)
349 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
350
351 if (prefix == NULL)
352 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
353 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
354 (const char *) localname, NULL, NULL, 0, 0,
355 "Attribute %s redefined\n", localname);
356 else
357 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
358 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
359 (const char *) prefix, (const char *) localname,
360 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361 localname);
362 if (ctxt != NULL) {
363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366 }
367 }
368
369 /**
370 * xmlFatalErr:
371 * @ctxt: an XML parser context
372 * @error: the error number
373 * @extra: extra information string
374 *
375 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
376 */
377 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)378 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
379 {
380 const char *errmsg;
381
382 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383 (ctxt->instate == XML_PARSER_EOF))
384 return;
385 switch (error) {
386 case XML_ERR_INVALID_HEX_CHARREF:
387 errmsg = "CharRef: invalid hexadecimal value";
388 break;
389 case XML_ERR_INVALID_DEC_CHARREF:
390 errmsg = "CharRef: invalid decimal value";
391 break;
392 case XML_ERR_INVALID_CHARREF:
393 errmsg = "CharRef: invalid value";
394 break;
395 case XML_ERR_INTERNAL_ERROR:
396 errmsg = "internal error";
397 break;
398 case XML_ERR_PEREF_AT_EOF:
399 errmsg = "PEReference at end of document";
400 break;
401 case XML_ERR_PEREF_IN_PROLOG:
402 errmsg = "PEReference in prolog";
403 break;
404 case XML_ERR_PEREF_IN_EPILOG:
405 errmsg = "PEReference in epilog";
406 break;
407 case XML_ERR_PEREF_NO_NAME:
408 errmsg = "PEReference: no name";
409 break;
410 case XML_ERR_PEREF_SEMICOL_MISSING:
411 errmsg = "PEReference: expecting ';'";
412 break;
413 case XML_ERR_ENTITY_LOOP:
414 errmsg = "Detected an entity reference loop";
415 break;
416 case XML_ERR_ENTITY_NOT_STARTED:
417 errmsg = "EntityValue: \" or ' expected";
418 break;
419 case XML_ERR_ENTITY_PE_INTERNAL:
420 errmsg = "PEReferences forbidden in internal subset";
421 break;
422 case XML_ERR_ENTITY_NOT_FINISHED:
423 errmsg = "EntityValue: \" or ' expected";
424 break;
425 case XML_ERR_ATTRIBUTE_NOT_STARTED:
426 errmsg = "AttValue: \" or ' expected";
427 break;
428 case XML_ERR_LT_IN_ATTRIBUTE:
429 errmsg = "Unescaped '<' not allowed in attributes values";
430 break;
431 case XML_ERR_LITERAL_NOT_STARTED:
432 errmsg = "SystemLiteral \" or ' expected";
433 break;
434 case XML_ERR_LITERAL_NOT_FINISHED:
435 errmsg = "Unfinished System or Public ID \" or ' expected";
436 break;
437 case XML_ERR_MISPLACED_CDATA_END:
438 errmsg = "Sequence ']]>' not allowed in content";
439 break;
440 case XML_ERR_URI_REQUIRED:
441 errmsg = "SYSTEM or PUBLIC, the URI is missing";
442 break;
443 case XML_ERR_PUBID_REQUIRED:
444 errmsg = "PUBLIC, the Public Identifier is missing";
445 break;
446 case XML_ERR_HYPHEN_IN_COMMENT:
447 errmsg = "Comment must not contain '--' (double-hyphen)";
448 break;
449 case XML_ERR_PI_NOT_STARTED:
450 errmsg = "xmlParsePI : no target name";
451 break;
452 case XML_ERR_RESERVED_XML_NAME:
453 errmsg = "Invalid PI name";
454 break;
455 case XML_ERR_NOTATION_NOT_STARTED:
456 errmsg = "NOTATION: Name expected here";
457 break;
458 case XML_ERR_NOTATION_NOT_FINISHED:
459 errmsg = "'>' required to close NOTATION declaration";
460 break;
461 case XML_ERR_VALUE_REQUIRED:
462 errmsg = "Entity value required";
463 break;
464 case XML_ERR_URI_FRAGMENT:
465 errmsg = "Fragment not allowed";
466 break;
467 case XML_ERR_ATTLIST_NOT_STARTED:
468 errmsg = "'(' required to start ATTLIST enumeration";
469 break;
470 case XML_ERR_NMTOKEN_REQUIRED:
471 errmsg = "NmToken expected in ATTLIST enumeration";
472 break;
473 case XML_ERR_ATTLIST_NOT_FINISHED:
474 errmsg = "')' required to finish ATTLIST enumeration";
475 break;
476 case XML_ERR_MIXED_NOT_STARTED:
477 errmsg = "MixedContentDecl : '|' or ')*' expected";
478 break;
479 case XML_ERR_PCDATA_REQUIRED:
480 errmsg = "MixedContentDecl : '#PCDATA' expected";
481 break;
482 case XML_ERR_ELEMCONTENT_NOT_STARTED:
483 errmsg = "ContentDecl : Name or '(' expected";
484 break;
485 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
486 errmsg = "ContentDecl : ',' '|' or ')' expected";
487 break;
488 case XML_ERR_PEREF_IN_INT_SUBSET:
489 errmsg =
490 "PEReference: forbidden within markup decl in internal subset";
491 break;
492 case XML_ERR_GT_REQUIRED:
493 errmsg = "expected '>'";
494 break;
495 case XML_ERR_CONDSEC_INVALID:
496 errmsg = "XML conditional section '[' expected";
497 break;
498 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
499 errmsg = "Content error in the external subset";
500 break;
501 case XML_ERR_CONDSEC_INVALID_KEYWORD:
502 errmsg =
503 "conditional section INCLUDE or IGNORE keyword expected";
504 break;
505 case XML_ERR_CONDSEC_NOT_FINISHED:
506 errmsg = "XML conditional section not closed";
507 break;
508 case XML_ERR_XMLDECL_NOT_STARTED:
509 errmsg = "Text declaration '<?xml' required";
510 break;
511 case XML_ERR_XMLDECL_NOT_FINISHED:
512 errmsg = "parsing XML declaration: '?>' expected";
513 break;
514 case XML_ERR_EXT_ENTITY_STANDALONE:
515 errmsg = "external parsed entities cannot be standalone";
516 break;
517 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
518 errmsg = "EntityRef: expecting ';'";
519 break;
520 case XML_ERR_DOCTYPE_NOT_FINISHED:
521 errmsg = "DOCTYPE improperly terminated";
522 break;
523 case XML_ERR_LTSLASH_REQUIRED:
524 errmsg = "EndTag: '</' not found";
525 break;
526 case XML_ERR_EQUAL_REQUIRED:
527 errmsg = "expected '='";
528 break;
529 case XML_ERR_STRING_NOT_CLOSED:
530 errmsg = "String not closed expecting \" or '";
531 break;
532 case XML_ERR_STRING_NOT_STARTED:
533 errmsg = "String not started expecting ' or \"";
534 break;
535 case XML_ERR_ENCODING_NAME:
536 errmsg = "Invalid XML encoding name";
537 break;
538 case XML_ERR_STANDALONE_VALUE:
539 errmsg = "standalone accepts only 'yes' or 'no'";
540 break;
541 case XML_ERR_DOCUMENT_EMPTY:
542 errmsg = "Document is empty";
543 break;
544 case XML_ERR_DOCUMENT_END:
545 errmsg = "Extra content at the end of the document";
546 break;
547 case XML_ERR_NOT_WELL_BALANCED:
548 errmsg = "chunk is not well balanced";
549 break;
550 case XML_ERR_EXTRA_CONTENT:
551 errmsg = "extra content at the end of well balanced chunk";
552 break;
553 case XML_ERR_VERSION_MISSING:
554 errmsg = "Malformed declaration expecting version";
555 break;
556 case XML_ERR_NAME_TOO_LONG:
557 errmsg = "Name too long";
558 break;
559 #if 0
560 case:
561 errmsg = "";
562 break;
563 #endif
564 default:
565 errmsg = "Unregistered error message";
566 }
567 if (ctxt != NULL)
568 ctxt->errNo = error;
569 if (info == NULL) {
570 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572 errmsg);
573 } else {
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576 errmsg, info);
577 }
578 if (ctxt != NULL) {
579 ctxt->wellFormed = 0;
580 if (ctxt->recovery == 0)
581 ctxt->disableSAX = 1;
582 }
583 }
584
585 /**
586 * xmlFatalErrMsg:
587 * @ctxt: an XML parser context
588 * @error: the error number
589 * @msg: the error message
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg)
596 {
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604 if (ctxt != NULL) {
605 ctxt->wellFormed = 0;
606 if (ctxt->recovery == 0)
607 ctxt->disableSAX = 1;
608 }
609 }
610
611 /**
612 * xmlWarningMsg:
613 * @ctxt: an XML parser context
614 * @error: the error number
615 * @msg: the error message
616 * @str1: extra data
617 * @str2: extra data
618 *
619 * Handle a warning.
620 */
621 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623 const char *msg, const xmlChar *str1, const xmlChar *str2)
624 {
625 xmlStructuredErrorFunc schannel = NULL;
626
627 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628 (ctxt->instate == XML_PARSER_EOF))
629 return;
630 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631 (ctxt->sax->initialized == XML_SAX2_MAGIC))
632 schannel = ctxt->sax->serror;
633 if (ctxt != NULL) {
634 __xmlRaiseError(schannel,
635 (ctxt->sax) ? ctxt->sax->warning : NULL,
636 ctxt->userData,
637 ctxt, NULL, XML_FROM_PARSER, error,
638 XML_ERR_WARNING, NULL, 0,
639 (const char *) str1, (const char *) str2, NULL, 0, 0,
640 msg, (const char *) str1, (const char *) str2);
641 } else {
642 __xmlRaiseError(schannel, NULL, NULL,
643 ctxt, NULL, XML_FROM_PARSER, error,
644 XML_ERR_WARNING, NULL, 0,
645 (const char *) str1, (const char *) str2, NULL, 0, 0,
646 msg, (const char *) str1, (const char *) str2);
647 }
648 }
649
650 /**
651 * xmlValidityError:
652 * @ctxt: an XML parser context
653 * @error: the error number
654 * @msg: the error message
655 * @str1: extra data
656 *
657 * Handle a validity error.
658 */
659 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)660 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661 const char *msg, const xmlChar *str1, const xmlChar *str2)
662 {
663 xmlStructuredErrorFunc schannel = NULL;
664
665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666 (ctxt->instate == XML_PARSER_EOF))
667 return;
668 if (ctxt != NULL) {
669 ctxt->errNo = error;
670 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671 schannel = ctxt->sax->serror;
672 }
673 if (ctxt != NULL) {
674 __xmlRaiseError(schannel,
675 ctxt->vctxt.error, ctxt->vctxt.userData,
676 ctxt, NULL, XML_FROM_DTD, error,
677 XML_ERR_ERROR, NULL, 0, (const char *) str1,
678 (const char *) str2, NULL, 0, 0,
679 msg, (const char *) str1, (const char *) str2);
680 ctxt->valid = 0;
681 } else {
682 __xmlRaiseError(schannel, NULL, NULL,
683 ctxt, NULL, XML_FROM_DTD, error,
684 XML_ERR_ERROR, NULL, 0, (const char *) str1,
685 (const char *) str2, NULL, 0, 0,
686 msg, (const char *) str1, (const char *) str2);
687 }
688 }
689
690 /**
691 * xmlFatalErrMsgInt:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @val: an integer value
696 *
697 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
698 */
699 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701 const char *msg, int val)
702 {
703 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704 (ctxt->instate == XML_PARSER_EOF))
705 return;
706 if (ctxt != NULL)
707 ctxt->errNo = error;
708 __xmlRaiseError(NULL, NULL, NULL,
709 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
710 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711 if (ctxt != NULL) {
712 ctxt->wellFormed = 0;
713 if (ctxt->recovery == 0)
714 ctxt->disableSAX = 1;
715 }
716 }
717
718 /**
719 * xmlFatalErrMsgStrIntStr:
720 * @ctxt: an XML parser context
721 * @error: the error number
722 * @msg: the error message
723 * @str1: an string info
724 * @val: an integer value
725 * @str2: an string info
726 *
727 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
728 */
729 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731 const char *msg, const xmlChar *str1, int val,
732 const xmlChar *str2)
733 {
734 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735 (ctxt->instate == XML_PARSER_EOF))
736 return;
737 if (ctxt != NULL)
738 ctxt->errNo = error;
739 __xmlRaiseError(NULL, NULL, NULL,
740 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
741 NULL, 0, (const char *) str1, (const char *) str2,
742 NULL, val, 0, msg, str1, val, str2);
743 if (ctxt != NULL) {
744 ctxt->wellFormed = 0;
745 if (ctxt->recovery == 0)
746 ctxt->disableSAX = 1;
747 }
748 }
749
750 /**
751 * xmlFatalErrMsgStr:
752 * @ctxt: an XML parser context
753 * @error: the error number
754 * @msg: the error message
755 * @val: a string value
756 *
757 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
758 */
759 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761 const char *msg, const xmlChar * val)
762 {
763 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764 (ctxt->instate == XML_PARSER_EOF))
765 return;
766 if (ctxt != NULL)
767 ctxt->errNo = error;
768 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
769 XML_FROM_PARSER, error, XML_ERR_FATAL,
770 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771 val);
772 if (ctxt != NULL) {
773 ctxt->wellFormed = 0;
774 if (ctxt->recovery == 0)
775 ctxt->disableSAX = 1;
776 }
777 }
778
779 /**
780 * xmlErrMsgStr:
781 * @ctxt: an XML parser context
782 * @error: the error number
783 * @msg: the error message
784 * @val: a string value
785 *
786 * Handle a non fatal parser error
787 */
788 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)789 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790 const char *msg, const xmlChar * val)
791 {
792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
795 if (ctxt != NULL)
796 ctxt->errNo = error;
797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
798 XML_FROM_PARSER, error, XML_ERR_ERROR,
799 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800 val);
801 }
802
803 /**
804 * xmlNsErr:
805 * @ctxt: an XML parser context
806 * @error: the error number
807 * @msg: the message
808 * @info1: extra information string
809 * @info2: extra information string
810 *
811 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
812 */
813 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)814 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
815 const char *msg,
816 const xmlChar * info1, const xmlChar * info2,
817 const xmlChar * info3)
818 {
819 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820 (ctxt->instate == XML_PARSER_EOF))
821 return;
822 if (ctxt != NULL)
823 ctxt->errNo = error;
824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_ERROR, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828 if (ctxt != NULL)
829 ctxt->nsWellFormed = 0;
830 }
831
832 /**
833 * xmlNsWarn
834 * @ctxt: an XML parser context
835 * @error: the error number
836 * @msg: the message
837 * @info1: extra information string
838 * @info2: extra information string
839 *
840 * Handle a namespace warning error
841 */
842 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)843 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
844 const char *msg,
845 const xmlChar * info1, const xmlChar * info2,
846 const xmlChar * info3)
847 {
848 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849 (ctxt->instate == XML_PARSER_EOF))
850 return;
851 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852 XML_ERR_WARNING, NULL, 0, (const char *) info1,
853 (const char *) info2, (const char *) info3, 0, 0, msg,
854 info1, info2, info3);
855 }
856
857 /************************************************************************
858 * *
859 * Library wide options *
860 * *
861 ************************************************************************/
862
863 /**
864 * xmlHasFeature:
865 * @feature: the feature to be examined
866 *
867 * Examines if the library has been compiled with a given feature.
868 *
869 * Returns a non-zero value if the feature exist, otherwise zero.
870 * Returns zero (0) if the feature does not exist or an unknown
871 * unknown feature is requested, non-zero otherwise.
872 */
873 int
xmlHasFeature(xmlFeature feature)874 xmlHasFeature(xmlFeature feature)
875 {
876 switch (feature) {
877 case XML_WITH_THREAD:
878 #ifdef LIBXML_THREAD_ENABLED
879 return(1);
880 #else
881 return(0);
882 #endif
883 case XML_WITH_TREE:
884 #ifdef LIBXML_TREE_ENABLED
885 return(1);
886 #else
887 return(0);
888 #endif
889 case XML_WITH_OUTPUT:
890 #ifdef LIBXML_OUTPUT_ENABLED
891 return(1);
892 #else
893 return(0);
894 #endif
895 case XML_WITH_PUSH:
896 #ifdef LIBXML_PUSH_ENABLED
897 return(1);
898 #else
899 return(0);
900 #endif
901 case XML_WITH_READER:
902 #ifdef LIBXML_READER_ENABLED
903 return(1);
904 #else
905 return(0);
906 #endif
907 case XML_WITH_PATTERN:
908 #ifdef LIBXML_PATTERN_ENABLED
909 return(1);
910 #else
911 return(0);
912 #endif
913 case XML_WITH_WRITER:
914 #ifdef LIBXML_WRITER_ENABLED
915 return(1);
916 #else
917 return(0);
918 #endif
919 case XML_WITH_SAX1:
920 #ifdef LIBXML_SAX1_ENABLED
921 return(1);
922 #else
923 return(0);
924 #endif
925 case XML_WITH_FTP:
926 #ifdef LIBXML_FTP_ENABLED
927 return(1);
928 #else
929 return(0);
930 #endif
931 case XML_WITH_HTTP:
932 #ifdef LIBXML_HTTP_ENABLED
933 return(1);
934 #else
935 return(0);
936 #endif
937 case XML_WITH_VALID:
938 #ifdef LIBXML_VALID_ENABLED
939 return(1);
940 #else
941 return(0);
942 #endif
943 case XML_WITH_HTML:
944 #ifdef LIBXML_HTML_ENABLED
945 return(1);
946 #else
947 return(0);
948 #endif
949 case XML_WITH_LEGACY:
950 #ifdef LIBXML_LEGACY_ENABLED
951 return(1);
952 #else
953 return(0);
954 #endif
955 case XML_WITH_C14N:
956 #ifdef LIBXML_C14N_ENABLED
957 return(1);
958 #else
959 return(0);
960 #endif
961 case XML_WITH_CATALOG:
962 #ifdef LIBXML_CATALOG_ENABLED
963 return(1);
964 #else
965 return(0);
966 #endif
967 case XML_WITH_XPATH:
968 #ifdef LIBXML_XPATH_ENABLED
969 return(1);
970 #else
971 return(0);
972 #endif
973 case XML_WITH_XPTR:
974 #ifdef LIBXML_XPTR_ENABLED
975 return(1);
976 #else
977 return(0);
978 #endif
979 case XML_WITH_XINCLUDE:
980 #ifdef LIBXML_XINCLUDE_ENABLED
981 return(1);
982 #else
983 return(0);
984 #endif
985 case XML_WITH_ICONV:
986 #ifdef LIBXML_ICONV_ENABLED
987 return(1);
988 #else
989 return(0);
990 #endif
991 case XML_WITH_ISO8859X:
992 #ifdef LIBXML_ISO8859X_ENABLED
993 return(1);
994 #else
995 return(0);
996 #endif
997 case XML_WITH_UNICODE:
998 #ifdef LIBXML_UNICODE_ENABLED
999 return(1);
1000 #else
1001 return(0);
1002 #endif
1003 case XML_WITH_REGEXP:
1004 #ifdef LIBXML_REGEXP_ENABLED
1005 return(1);
1006 #else
1007 return(0);
1008 #endif
1009 case XML_WITH_AUTOMATA:
1010 #ifdef LIBXML_AUTOMATA_ENABLED
1011 return(1);
1012 #else
1013 return(0);
1014 #endif
1015 case XML_WITH_EXPR:
1016 #ifdef LIBXML_EXPR_ENABLED
1017 return(1);
1018 #else
1019 return(0);
1020 #endif
1021 case XML_WITH_SCHEMAS:
1022 #ifdef LIBXML_SCHEMAS_ENABLED
1023 return(1);
1024 #else
1025 return(0);
1026 #endif
1027 case XML_WITH_SCHEMATRON:
1028 #ifdef LIBXML_SCHEMATRON_ENABLED
1029 return(1);
1030 #else
1031 return(0);
1032 #endif
1033 case XML_WITH_MODULES:
1034 #ifdef LIBXML_MODULES_ENABLED
1035 return(1);
1036 #else
1037 return(0);
1038 #endif
1039 case XML_WITH_DEBUG:
1040 #ifdef LIBXML_DEBUG_ENABLED
1041 return(1);
1042 #else
1043 return(0);
1044 #endif
1045 case XML_WITH_DEBUG_MEM:
1046 #ifdef DEBUG_MEMORY_LOCATION
1047 return(1);
1048 #else
1049 return(0);
1050 #endif
1051 case XML_WITH_DEBUG_RUN:
1052 #ifdef LIBXML_DEBUG_RUNTIME
1053 return(1);
1054 #else
1055 return(0);
1056 #endif
1057 case XML_WITH_ZLIB:
1058 #ifdef LIBXML_ZLIB_ENABLED
1059 return(1);
1060 #else
1061 return(0);
1062 #endif
1063 case XML_WITH_LZMA:
1064 #ifdef LIBXML_LZMA_ENABLED
1065 return(1);
1066 #else
1067 return(0);
1068 #endif
1069 case XML_WITH_ICU:
1070 #ifdef LIBXML_ICU_ENABLED
1071 return(1);
1072 #else
1073 return(0);
1074 #endif
1075 default:
1076 break;
1077 }
1078 return(0);
1079 }
1080
1081 /************************************************************************
1082 * *
1083 * SAX2 defaulted attributes handling *
1084 * *
1085 ************************************************************************/
1086
1087 /**
1088 * xmlDetectSAX2:
1089 * @ctxt: an XML parser context
1090 *
1091 * Do the SAX2 detection and specific initialization
1092 */
1093 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1094 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1095 xmlSAXHandlerPtr sax;
1096
1097 /* Avoid unused variable warning if features are disabled. */
1098 (void) sax;
1099
1100 if (ctxt == NULL) return;
1101 sax = ctxt->sax;
1102 #ifdef LIBXML_SAX1_ENABLED
1103 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1104 ((sax->startElementNs != NULL) ||
1105 (sax->endElementNs != NULL) ||
1106 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107 ctxt->sax2 = 1;
1108 #else
1109 ctxt->sax2 = 1;
1110 #endif /* LIBXML_SAX1_ENABLED */
1111
1112 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116 (ctxt->str_xml_ns == NULL)) {
1117 xmlErrMemory(ctxt, NULL);
1118 }
1119 }
1120
1121 typedef struct _xmlDefAttrs xmlDefAttrs;
1122 typedef xmlDefAttrs *xmlDefAttrsPtr;
1123 struct _xmlDefAttrs {
1124 int nbAttrs; /* number of defaulted attributes on that element */
1125 int maxAttrs; /* the size of the array */
1126 #if __STDC_VERSION__ >= 199901L
1127 /* Using a C99 flexible array member avoids UBSan errors. */
1128 const xmlChar *values[]; /* array of localname/prefix/values/external */
1129 #else
1130 const xmlChar *values[5];
1131 #endif
1132 };
1133
1134 /**
1135 * xmlAttrNormalizeSpace:
1136 * @src: the source string
1137 * @dst: the target string
1138 *
1139 * Normalize the space in non CDATA attribute values:
1140 * If the attribute type is not CDATA, then the XML processor MUST further
1141 * process the normalized attribute value by discarding any leading and
1142 * trailing space (#x20) characters, and by replacing sequences of space
1143 * (#x20) characters by a single space (#x20) character.
1144 * Note that the size of dst need to be at least src, and if one doesn't need
1145 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1146 * passing src as dst is just fine.
1147 *
1148 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1149 * is needed.
1150 */
1151 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1152 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1153 {
1154 if ((src == NULL) || (dst == NULL))
1155 return(NULL);
1156
1157 while (*src == 0x20) src++;
1158 while (*src != 0) {
1159 if (*src == 0x20) {
1160 while (*src == 0x20) src++;
1161 if (*src != 0)
1162 *dst++ = 0x20;
1163 } else {
1164 *dst++ = *src++;
1165 }
1166 }
1167 *dst = 0;
1168 if (dst == src)
1169 return(NULL);
1170 return(dst);
1171 }
1172
1173 /**
1174 * xmlAttrNormalizeSpace2:
1175 * @src: the source string
1176 *
1177 * Normalize the space in non CDATA attribute values, a slightly more complex
1178 * front end to avoid allocation problems when running on attribute values
1179 * coming from the input.
1180 *
1181 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1182 * is needed.
1183 */
1184 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1185 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1186 {
1187 int i;
1188 int remove_head = 0;
1189 int need_realloc = 0;
1190 const xmlChar *cur;
1191
1192 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193 return(NULL);
1194 i = *len;
1195 if (i <= 0)
1196 return(NULL);
1197
1198 cur = src;
1199 while (*cur == 0x20) {
1200 cur++;
1201 remove_head++;
1202 }
1203 while (*cur != 0) {
1204 if (*cur == 0x20) {
1205 cur++;
1206 if ((*cur == 0x20) || (*cur == 0)) {
1207 need_realloc = 1;
1208 break;
1209 }
1210 } else
1211 cur++;
1212 }
1213 if (need_realloc) {
1214 xmlChar *ret;
1215
1216 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217 if (ret == NULL) {
1218 xmlErrMemory(ctxt, NULL);
1219 return(NULL);
1220 }
1221 xmlAttrNormalizeSpace(ret, ret);
1222 *len = (int) strlen((const char *)ret);
1223 return(ret);
1224 } else if (remove_head) {
1225 *len -= remove_head;
1226 memmove(src, src + remove_head, 1 + *len);
1227 return(src);
1228 }
1229 return(NULL);
1230 }
1231
1232 /**
1233 * xmlAddDefAttrs:
1234 * @ctxt: an XML parser context
1235 * @fullname: the element fullname
1236 * @fullattr: the attribute fullname
1237 * @value: the attribute value
1238 *
1239 * Add a defaulted attribute for an element
1240 */
1241 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1242 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1243 const xmlChar *fullname,
1244 const xmlChar *fullattr,
1245 const xmlChar *value) {
1246 xmlDefAttrsPtr defaults;
1247 int len;
1248 const xmlChar *name;
1249 const xmlChar *prefix;
1250
1251 /*
1252 * Allows to detect attribute redefinitions
1253 */
1254 if (ctxt->attsSpecial != NULL) {
1255 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256 return;
1257 }
1258
1259 if (ctxt->attsDefault == NULL) {
1260 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261 if (ctxt->attsDefault == NULL)
1262 goto mem_error;
1263 }
1264
1265 /*
1266 * split the element name into prefix:localname , the string found
1267 * are within the DTD and then not associated to namespace names.
1268 */
1269 name = xmlSplitQName3(fullname, &len);
1270 if (name == NULL) {
1271 name = xmlDictLookup(ctxt->dict, fullname, -1);
1272 prefix = NULL;
1273 } else {
1274 name = xmlDictLookup(ctxt->dict, name, -1);
1275 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1276 }
1277
1278 /*
1279 * make sure there is some storage
1280 */
1281 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282 if (defaults == NULL) {
1283 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1284 (4 * 5) * sizeof(const xmlChar *));
1285 if (defaults == NULL)
1286 goto mem_error;
1287 defaults->nbAttrs = 0;
1288 defaults->maxAttrs = 4;
1289 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290 defaults, NULL) < 0) {
1291 xmlFree(defaults);
1292 goto mem_error;
1293 }
1294 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1295 xmlDefAttrsPtr temp;
1296
1297 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1298 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299 if (temp == NULL)
1300 goto mem_error;
1301 defaults = temp;
1302 defaults->maxAttrs *= 2;
1303 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304 defaults, NULL) < 0) {
1305 xmlFree(defaults);
1306 goto mem_error;
1307 }
1308 }
1309
1310 /*
1311 * Split the element name into prefix:localname , the string found
1312 * are within the DTD and hen not associated to namespace names.
1313 */
1314 name = xmlSplitQName3(fullattr, &len);
1315 if (name == NULL) {
1316 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317 prefix = NULL;
1318 } else {
1319 name = xmlDictLookup(ctxt->dict, name, -1);
1320 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321 }
1322
1323 defaults->values[5 * defaults->nbAttrs] = name;
1324 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325 /* intern the string and precompute the end */
1326 len = xmlStrlen(value);
1327 value = xmlDictLookup(ctxt->dict, value, len);
1328 defaults->values[5 * defaults->nbAttrs + 2] = value;
1329 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330 if (ctxt->external)
1331 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332 else
1333 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334 defaults->nbAttrs++;
1335
1336 return;
1337
1338 mem_error:
1339 xmlErrMemory(ctxt, NULL);
1340 return;
1341 }
1342
1343 /**
1344 * xmlAddSpecialAttr:
1345 * @ctxt: an XML parser context
1346 * @fullname: the element fullname
1347 * @fullattr: the attribute fullname
1348 * @type: the attribute type
1349 *
1350 * Register this attribute type
1351 */
1352 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1353 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1354 const xmlChar *fullname,
1355 const xmlChar *fullattr,
1356 int type)
1357 {
1358 if (ctxt->attsSpecial == NULL) {
1359 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360 if (ctxt->attsSpecial == NULL)
1361 goto mem_error;
1362 }
1363
1364 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365 return;
1366
1367 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368 (void *) (ptrdiff_t) type);
1369 return;
1370
1371 mem_error:
1372 xmlErrMemory(ctxt, NULL);
1373 return;
1374 }
1375
1376 /**
1377 * xmlCleanSpecialAttrCallback:
1378 *
1379 * Removes CDATA attributes from the special attribute table
1380 */
1381 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1382 xmlCleanSpecialAttrCallback(void *payload, void *data,
1383 const xmlChar *fullname, const xmlChar *fullattr,
1384 const xmlChar *unused ATTRIBUTE_UNUSED) {
1385 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1386
1387 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1389 }
1390 }
1391
1392 /**
1393 * xmlCleanSpecialAttr:
1394 * @ctxt: an XML parser context
1395 *
1396 * Trim the list of attributes defined to remove all those of type
1397 * CDATA as they are not special. This call should be done when finishing
1398 * to parse the DTD and before starting to parse the document root.
1399 */
1400 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1401 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1402 {
1403 if (ctxt->attsSpecial == NULL)
1404 return;
1405
1406 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1407
1408 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1409 xmlHashFree(ctxt->attsSpecial, NULL);
1410 ctxt->attsSpecial = NULL;
1411 }
1412 return;
1413 }
1414
1415 /**
1416 * xmlCheckLanguageID:
1417 * @lang: pointer to the string value
1418 *
1419 * Checks that the value conforms to the LanguageID production:
1420 *
1421 * NOTE: this is somewhat deprecated, those productions were removed from
1422 * the XML Second edition.
1423 *
1424 * [33] LanguageID ::= Langcode ('-' Subcode)*
1425 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1426 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1427 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1428 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1429 * [38] Subcode ::= ([a-z] | [A-Z])+
1430 *
1431 * The current REC reference the successors of RFC 1766, currently 5646
1432 *
1433 * http://www.rfc-editor.org/rfc/rfc5646.txt
1434 * langtag = language
1435 * ["-" script]
1436 * ["-" region]
1437 * *("-" variant)
1438 * *("-" extension)
1439 * ["-" privateuse]
1440 * language = 2*3ALPHA ; shortest ISO 639 code
1441 * ["-" extlang] ; sometimes followed by
1442 * ; extended language subtags
1443 * / 4ALPHA ; or reserved for future use
1444 * / 5*8ALPHA ; or registered language subtag
1445 *
1446 * extlang = 3ALPHA ; selected ISO 639 codes
1447 * *2("-" 3ALPHA) ; permanently reserved
1448 *
1449 * script = 4ALPHA ; ISO 15924 code
1450 *
1451 * region = 2ALPHA ; ISO 3166-1 code
1452 * / 3DIGIT ; UN M.49 code
1453 *
1454 * variant = 5*8alphanum ; registered variants
1455 * / (DIGIT 3alphanum)
1456 *
1457 * extension = singleton 1*("-" (2*8alphanum))
1458 *
1459 * ; Single alphanumerics
1460 * ; "x" reserved for private use
1461 * singleton = DIGIT ; 0 - 9
1462 * / %x41-57 ; A - W
1463 * / %x59-5A ; Y - Z
1464 * / %x61-77 ; a - w
1465 * / %x79-7A ; y - z
1466 *
1467 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1468 * The parser below doesn't try to cope with extension or privateuse
1469 * that could be added but that's not interoperable anyway
1470 *
1471 * Returns 1 if correct 0 otherwise
1472 **/
1473 int
xmlCheckLanguageID(const xmlChar * lang)1474 xmlCheckLanguageID(const xmlChar * lang)
1475 {
1476 const xmlChar *cur = lang, *nxt;
1477
1478 if (cur == NULL)
1479 return (0);
1480 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481 ((cur[0] == 'I') && (cur[1] == '-')) ||
1482 ((cur[0] == 'x') && (cur[1] == '-')) ||
1483 ((cur[0] == 'X') && (cur[1] == '-'))) {
1484 /*
1485 * Still allow IANA code and user code which were coming
1486 * from the previous version of the XML-1.0 specification
1487 * it's deprecated but we should not fail
1488 */
1489 cur += 2;
1490 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492 cur++;
1493 return(cur[0] == 0);
1494 }
1495 nxt = cur;
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur >= 4) {
1500 /*
1501 * Reserved
1502 */
1503 if ((nxt - cur > 8) || (nxt[0] != 0))
1504 return(0);
1505 return(1);
1506 }
1507 if (nxt - cur < 2)
1508 return(0);
1509 /* we got an ISO 639 code */
1510 if (nxt[0] == 0)
1511 return(1);
1512 if (nxt[0] != '-')
1513 return(0);
1514
1515 nxt++;
1516 cur = nxt;
1517 /* now we can have extlang or script or region or variant */
1518 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519 goto region_m49;
1520
1521 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523 nxt++;
1524 if (nxt - cur == 4)
1525 goto script;
1526 if (nxt - cur == 2)
1527 goto region;
1528 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529 goto variant;
1530 if (nxt - cur != 3)
1531 return(0);
1532 /* we parsed an extlang */
1533 if (nxt[0] == 0)
1534 return(1);
1535 if (nxt[0] != '-')
1536 return(0);
1537
1538 nxt++;
1539 cur = nxt;
1540 /* now we can have script or region or variant */
1541 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542 goto region_m49;
1543
1544 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546 nxt++;
1547 if (nxt - cur == 2)
1548 goto region;
1549 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550 goto variant;
1551 if (nxt - cur != 4)
1552 return(0);
1553 /* we parsed a script */
1554 script:
1555 if (nxt[0] == 0)
1556 return(1);
1557 if (nxt[0] != '-')
1558 return(0);
1559
1560 nxt++;
1561 cur = nxt;
1562 /* now we can have region or variant */
1563 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564 goto region_m49;
1565
1566 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568 nxt++;
1569
1570 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571 goto variant;
1572 if (nxt - cur != 2)
1573 return(0);
1574 /* we parsed a region */
1575 region:
1576 if (nxt[0] == 0)
1577 return(1);
1578 if (nxt[0] != '-')
1579 return(0);
1580
1581 nxt++;
1582 cur = nxt;
1583 /* now we can just have a variant */
1584 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586 nxt++;
1587
1588 if ((nxt - cur < 5) || (nxt - cur > 8))
1589 return(0);
1590
1591 /* we parsed a variant */
1592 variant:
1593 if (nxt[0] == 0)
1594 return(1);
1595 if (nxt[0] != '-')
1596 return(0);
1597 /* extensions and private use subtags not checked */
1598 return (1);
1599
1600 region_m49:
1601 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603 nxt += 3;
1604 goto region;
1605 }
1606 return(0);
1607 }
1608
1609 /************************************************************************
1610 * *
1611 * Parser stacks related functions and macros *
1612 * *
1613 ************************************************************************/
1614
1615 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1616 const xmlChar ** str);
1617
1618 #ifdef SAX2
1619 /**
1620 * nsPush:
1621 * @ctxt: an XML parser context
1622 * @prefix: the namespace prefix or NULL
1623 * @URL: the namespace name
1624 *
1625 * Pushes a new parser namespace on top of the ns stack
1626 *
1627 * Returns -1 in case of error, -2 if the namespace should be discarded
1628 * and the index in the stack otherwise.
1629 */
1630 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1631 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1632 {
1633 if (ctxt->options & XML_PARSE_NSCLEAN) {
1634 int i;
1635 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636 if (ctxt->nsTab[i] == prefix) {
1637 /* in scope */
1638 if (ctxt->nsTab[i + 1] == URL)
1639 return(-2);
1640 /* out of scope keep it */
1641 break;
1642 }
1643 }
1644 }
1645 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646 ctxt->nsMax = 10;
1647 ctxt->nsNr = 0;
1648 ctxt->nsTab = (const xmlChar **)
1649 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650 if (ctxt->nsTab == NULL) {
1651 xmlErrMemory(ctxt, NULL);
1652 ctxt->nsMax = 0;
1653 return (-1);
1654 }
1655 } else if (ctxt->nsNr >= ctxt->nsMax) {
1656 const xmlChar ** tmp;
1657 ctxt->nsMax *= 2;
1658 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660 if (tmp == NULL) {
1661 xmlErrMemory(ctxt, NULL);
1662 ctxt->nsMax /= 2;
1663 return (-1);
1664 }
1665 ctxt->nsTab = tmp;
1666 }
1667 ctxt->nsTab[ctxt->nsNr++] = prefix;
1668 ctxt->nsTab[ctxt->nsNr++] = URL;
1669 return (ctxt->nsNr);
1670 }
1671 /**
1672 * nsPop:
1673 * @ctxt: an XML parser context
1674 * @nr: the number to pop
1675 *
1676 * Pops the top @nr parser prefix/namespace from the ns stack
1677 *
1678 * Returns the number of namespaces removed
1679 */
1680 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1681 nsPop(xmlParserCtxtPtr ctxt, int nr)
1682 {
1683 int i;
1684
1685 if (ctxt->nsTab == NULL) return(0);
1686 if (ctxt->nsNr < nr) {
1687 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688 nr = ctxt->nsNr;
1689 }
1690 if (ctxt->nsNr <= 0)
1691 return (0);
1692
1693 for (i = 0;i < nr;i++) {
1694 ctxt->nsNr--;
1695 ctxt->nsTab[ctxt->nsNr] = NULL;
1696 }
1697 return(nr);
1698 }
1699 #endif
1700
1701 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1702 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1703 const xmlChar **atts;
1704 int *attallocs;
1705 int maxatts;
1706
1707 if (ctxt->atts == NULL) {
1708 maxatts = 55; /* allow for 10 attrs by default */
1709 atts = (const xmlChar **)
1710 xmlMalloc(maxatts * sizeof(xmlChar *));
1711 if (atts == NULL) goto mem_error;
1712 ctxt->atts = atts;
1713 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714 if (attallocs == NULL) goto mem_error;
1715 ctxt->attallocs = attallocs;
1716 ctxt->maxatts = maxatts;
1717 } else if (nr + 5 > ctxt->maxatts) {
1718 maxatts = (nr + 5) * 2;
1719 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720 maxatts * sizeof(const xmlChar *));
1721 if (atts == NULL) goto mem_error;
1722 ctxt->atts = atts;
1723 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724 (maxatts / 5) * sizeof(int));
1725 if (attallocs == NULL) goto mem_error;
1726 ctxt->attallocs = attallocs;
1727 ctxt->maxatts = maxatts;
1728 }
1729 return(ctxt->maxatts);
1730 mem_error:
1731 xmlErrMemory(ctxt, NULL);
1732 return(-1);
1733 }
1734
1735 /**
1736 * inputPush:
1737 * @ctxt: an XML parser context
1738 * @value: the parser input
1739 *
1740 * Pushes a new parser input on top of the input stack
1741 *
1742 * Returns -1 in case of error, the index in the stack otherwise
1743 */
1744 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1745 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1746 {
1747 if ((ctxt == NULL) || (value == NULL))
1748 return(-1);
1749 if (ctxt->inputNr >= ctxt->inputMax) {
1750 ctxt->inputMax *= 2;
1751 ctxt->inputTab =
1752 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1753 ctxt->inputMax *
1754 sizeof(ctxt->inputTab[0]));
1755 if (ctxt->inputTab == NULL) {
1756 xmlErrMemory(ctxt, NULL);
1757 ctxt->inputMax /= 2;
1758 return (-1);
1759 }
1760 }
1761 ctxt->inputTab[ctxt->inputNr] = value;
1762 ctxt->input = value;
1763 return (ctxt->inputNr++);
1764 }
1765 /**
1766 * inputPop:
1767 * @ctxt: an XML parser context
1768 *
1769 * Pops the top parser input from the input stack
1770 *
1771 * Returns the input just removed
1772 */
1773 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1774 inputPop(xmlParserCtxtPtr ctxt)
1775 {
1776 xmlParserInputPtr ret;
1777
1778 if (ctxt == NULL)
1779 return(NULL);
1780 if (ctxt->inputNr <= 0)
1781 return (NULL);
1782 ctxt->inputNr--;
1783 if (ctxt->inputNr > 0)
1784 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785 else
1786 ctxt->input = NULL;
1787 ret = ctxt->inputTab[ctxt->inputNr];
1788 ctxt->inputTab[ctxt->inputNr] = NULL;
1789 return (ret);
1790 }
1791 /**
1792 * nodePush:
1793 * @ctxt: an XML parser context
1794 * @value: the element node
1795 *
1796 * Pushes a new element node on top of the node stack
1797 *
1798 * Returns -1 in case of error, the index in the stack otherwise
1799 */
1800 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1801 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1802 {
1803 if (ctxt == NULL) return(0);
1804 if (ctxt->nodeNr >= ctxt->nodeMax) {
1805 xmlNodePtr *tmp;
1806
1807 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808 ctxt->nodeMax * 2 *
1809 sizeof(ctxt->nodeTab[0]));
1810 if (tmp == NULL) {
1811 xmlErrMemory(ctxt, NULL);
1812 return (-1);
1813 }
1814 ctxt->nodeTab = tmp;
1815 ctxt->nodeMax *= 2;
1816 }
1817 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1821 xmlParserMaxDepth);
1822 xmlHaltParser(ctxt);
1823 return(-1);
1824 }
1825 ctxt->nodeTab[ctxt->nodeNr] = value;
1826 ctxt->node = value;
1827 return (ctxt->nodeNr++);
1828 }
1829
1830 /**
1831 * nodePop:
1832 * @ctxt: an XML parser context
1833 *
1834 * Pops the top element node from the node stack
1835 *
1836 * Returns the node just removed
1837 */
1838 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1839 nodePop(xmlParserCtxtPtr ctxt)
1840 {
1841 xmlNodePtr ret;
1842
1843 if (ctxt == NULL) return(NULL);
1844 if (ctxt->nodeNr <= 0)
1845 return (NULL);
1846 ctxt->nodeNr--;
1847 if (ctxt->nodeNr > 0)
1848 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849 else
1850 ctxt->node = NULL;
1851 ret = ctxt->nodeTab[ctxt->nodeNr];
1852 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853 return (ret);
1854 }
1855
1856 /**
1857 * nameNsPush:
1858 * @ctxt: an XML parser context
1859 * @value: the element name
1860 * @prefix: the element prefix
1861 * @URI: the element namespace name
1862 * @line: the current line number for error messages
1863 * @nsNr: the number of namespaces pushed on the namespace table
1864 *
1865 * Pushes a new element name/prefix/URL on top of the name stack
1866 *
1867 * Returns -1 in case of error, the index in the stack otherwise
1868 */
1869 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1870 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1871 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872 {
1873 xmlStartTag *tag;
1874
1875 if (ctxt->nameNr >= ctxt->nameMax) {
1876 const xmlChar * *tmp;
1877 xmlStartTag *tmp2;
1878 ctxt->nameMax *= 2;
1879 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880 ctxt->nameMax *
1881 sizeof(ctxt->nameTab[0]));
1882 if (tmp == NULL) {
1883 ctxt->nameMax /= 2;
1884 goto mem_error;
1885 }
1886 ctxt->nameTab = tmp;
1887 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888 ctxt->nameMax *
1889 sizeof(ctxt->pushTab[0]));
1890 if (tmp2 == NULL) {
1891 ctxt->nameMax /= 2;
1892 goto mem_error;
1893 }
1894 ctxt->pushTab = tmp2;
1895 } else if (ctxt->pushTab == NULL) {
1896 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897 sizeof(ctxt->pushTab[0]));
1898 if (ctxt->pushTab == NULL)
1899 goto mem_error;
1900 }
1901 ctxt->nameTab[ctxt->nameNr] = value;
1902 ctxt->name = value;
1903 tag = &ctxt->pushTab[ctxt->nameNr];
1904 tag->prefix = prefix;
1905 tag->URI = URI;
1906 tag->line = line;
1907 tag->nsNr = nsNr;
1908 return (ctxt->nameNr++);
1909 mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1912 }
1913 #ifdef LIBXML_PUSH_ENABLED
1914 /**
1915 * nameNsPop:
1916 * @ctxt: an XML parser context
1917 *
1918 * Pops the top element/prefix/URI name from the name stack
1919 *
1920 * Returns the name just removed
1921 */
1922 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1923 nameNsPop(xmlParserCtxtPtr ctxt)
1924 {
1925 const xmlChar *ret;
1926
1927 if (ctxt->nameNr <= 0)
1928 return (NULL);
1929 ctxt->nameNr--;
1930 if (ctxt->nameNr > 0)
1931 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932 else
1933 ctxt->name = NULL;
1934 ret = ctxt->nameTab[ctxt->nameNr];
1935 ctxt->nameTab[ctxt->nameNr] = NULL;
1936 return (ret);
1937 }
1938 #endif /* LIBXML_PUSH_ENABLED */
1939
1940 /**
1941 * namePush:
1942 * @ctxt: an XML parser context
1943 * @value: the element name
1944 *
1945 * Pushes a new element name on top of the name stack
1946 *
1947 * Returns -1 in case of error, the index in the stack otherwise
1948 */
1949 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1950 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1951 {
1952 if (ctxt == NULL) return (-1);
1953
1954 if (ctxt->nameNr >= ctxt->nameMax) {
1955 const xmlChar * *tmp;
1956 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957 ctxt->nameMax * 2 *
1958 sizeof(ctxt->nameTab[0]));
1959 if (tmp == NULL) {
1960 goto mem_error;
1961 }
1962 ctxt->nameTab = tmp;
1963 ctxt->nameMax *= 2;
1964 }
1965 ctxt->nameTab[ctxt->nameNr] = value;
1966 ctxt->name = value;
1967 return (ctxt->nameNr++);
1968 mem_error:
1969 xmlErrMemory(ctxt, NULL);
1970 return (-1);
1971 }
1972 /**
1973 * namePop:
1974 * @ctxt: an XML parser context
1975 *
1976 * Pops the top element name from the name stack
1977 *
1978 * Returns the name just removed
1979 */
1980 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1981 namePop(xmlParserCtxtPtr ctxt)
1982 {
1983 const xmlChar *ret;
1984
1985 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986 return (NULL);
1987 ctxt->nameNr--;
1988 if (ctxt->nameNr > 0)
1989 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990 else
1991 ctxt->name = NULL;
1992 ret = ctxt->nameTab[ctxt->nameNr];
1993 ctxt->nameTab[ctxt->nameNr] = NULL;
1994 return (ret);
1995 }
1996
spacePush(xmlParserCtxtPtr ctxt,int val)1997 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998 if (ctxt->spaceNr >= ctxt->spaceMax) {
1999 int *tmp;
2000
2001 ctxt->spaceMax *= 2;
2002 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004 if (tmp == NULL) {
2005 xmlErrMemory(ctxt, NULL);
2006 ctxt->spaceMax /=2;
2007 return(-1);
2008 }
2009 ctxt->spaceTab = tmp;
2010 }
2011 ctxt->spaceTab[ctxt->spaceNr] = val;
2012 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013 return(ctxt->spaceNr++);
2014 }
2015
spacePop(xmlParserCtxtPtr ctxt)2016 static int spacePop(xmlParserCtxtPtr ctxt) {
2017 int ret;
2018 if (ctxt->spaceNr <= 0) return(0);
2019 ctxt->spaceNr--;
2020 if (ctxt->spaceNr > 0)
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022 else
2023 ctxt->space = &ctxt->spaceTab[0];
2024 ret = ctxt->spaceTab[ctxt->spaceNr];
2025 ctxt->spaceTab[ctxt->spaceNr] = -1;
2026 return(ret);
2027 }
2028
2029 /*
2030 * Macros for accessing the content. Those should be used only by the parser,
2031 * and not exported.
2032 *
2033 * Dirty macros, i.e. one often need to make assumption on the context to
2034 * use them
2035 *
2036 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2037 * To be used with extreme caution since operations consuming
2038 * characters may move the input buffer to a different location !
2039 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2040 * This should be used internally by the parser
2041 * only to compare to ASCII values otherwise it would break when
2042 * running with UTF-8 encoding.
2043 * RAW same as CUR but in the input buffer, bypass any token
2044 * extraction that may have been done
2045 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2046 * to compare on ASCII based substring.
2047 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048 * strings without newlines within the parser.
2049 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050 * defined char within the parser.
2051 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052 *
2053 * NEXT Skip to the next character, this does the proper decoding
2054 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2056 * CUR_CHAR(l) returns the current unicode character (int), set l
2057 * to the number of xmlChars used for the encoding [0-5].
2058 * CUR_SCHAR same but operate on a string instead of the context
2059 * COPY_BUF copy the current unicode char to the target buffer, increment
2060 * the index
2061 * GROW, SHRINK handling of input buffers
2062 */
2063
2064 #define RAW (*ctxt->input->cur)
2065 #define CUR (*ctxt->input->cur)
2066 #define NXT(val) ctxt->input->cur[(val)]
2067 #define CUR_PTR ctxt->input->cur
2068 #define BASE_PTR ctxt->input->base
2069
2070 #define CMP4( s, c1, c2, c3, c4 ) \
2071 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2074 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083 ((unsigned char *) s)[ 8 ] == c9 )
2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086 ((unsigned char *) s)[ 9 ] == c10 )
2087
2088 #define SKIP(val) do { \
2089 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2090 if (*ctxt->input->cur == 0) \
2091 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2092 } while (0)
2093
2094 #define SKIPL(val) do { \
2095 int skipl; \
2096 for(skipl=0; skipl<val; skipl++) { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur++; \
2101 } \
2102 if (*ctxt->input->cur == 0) \
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2104 } while (0)
2105
2106 #define SHRINK if ((ctxt->progressive == 0) && \
2107 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109 xmlSHRINK (ctxt);
2110
xmlSHRINK(xmlParserCtxtPtr ctxt)2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112 xmlParserInputShrink(ctxt->input);
2113 if (*ctxt->input->cur == 0)
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 }
2116
2117 #define GROW if ((ctxt->progressive == 0) && \
2118 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119 xmlGROW (ctxt);
2120
xmlGROW(xmlParserCtxtPtr ctxt)2121 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124
2125 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127 ((ctxt->input->buf) &&
2128 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2129 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131 xmlHaltParser(ctxt);
2132 return;
2133 }
2134 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2135 if ((ctxt->input->cur > ctxt->input->end) ||
2136 (ctxt->input->cur < ctxt->input->base)) {
2137 xmlHaltParser(ctxt);
2138 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139 return;
2140 }
2141 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2142 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2143 }
2144
2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146
2147 #define NEXT xmlNextChar(ctxt)
2148
2149 #define NEXT1 { \
2150 ctxt->input->col++; \
2151 ctxt->input->cur++; \
2152 if (*ctxt->input->cur == 0) \
2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2154 }
2155
2156 #define NEXTL(l) do { \
2157 if (*(ctxt->input->cur) == '\n') { \
2158 ctxt->input->line++; ctxt->input->col = 1; \
2159 } else ctxt->input->col++; \
2160 ctxt->input->cur += l; \
2161 } while (0)
2162
2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165
2166 #define COPY_BUF(l,b,i,v) \
2167 if (l == 1) b[i++] = (xmlChar) v; \
2168 else i += xmlCopyCharMultiByte(&b[i],v)
2169
2170 #define CUR_CONSUMED \
2171 (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172
2173 /**
2174 * xmlSkipBlankChars:
2175 * @ctxt: the XML parser context
2176 *
2177 * skip all blanks character found at that point in the input streams.
2178 * It pops up finished entities in the process if allowable at that point.
2179 *
2180 * Returns the number of space chars skipped
2181 */
2182
2183 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2184 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2185 int res = 0;
2186
2187 /*
2188 * It's Okay to use CUR/NEXT here since all the blanks are on
2189 * the ASCII range.
2190 */
2191 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192 (ctxt->instate == XML_PARSER_START)) {
2193 const xmlChar *cur;
2194 /*
2195 * if we are in the document content, go really fast
2196 */
2197 cur = ctxt->input->cur;
2198 while (IS_BLANK_CH(*cur)) {
2199 if (*cur == '\n') {
2200 ctxt->input->line++; ctxt->input->col = 1;
2201 } else {
2202 ctxt->input->col++;
2203 }
2204 cur++;
2205 if (res < INT_MAX)
2206 res++;
2207 if (*cur == 0) {
2208 ctxt->input->cur = cur;
2209 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2210 cur = ctxt->input->cur;
2211 }
2212 }
2213 ctxt->input->cur = cur;
2214 } else {
2215 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216
2217 while (1) {
2218 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219 NEXT;
2220 } else if (CUR == '%') {
2221 /*
2222 * Need to handle support of entities branching here
2223 */
2224 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225 break;
2226 xmlParsePEReference(ctxt);
2227 } else if (CUR == 0) {
2228 if (ctxt->inputNr <= 1)
2229 break;
2230 xmlPopInput(ctxt);
2231 } else {
2232 break;
2233 }
2234
2235 /*
2236 * Also increase the counter when entering or exiting a PERef.
2237 * The spec says: "When a parameter-entity reference is recognized
2238 * in the DTD and included, its replacement text MUST be enlarged
2239 * by the attachment of one leading and one following space (#x20)
2240 * character."
2241 */
2242 if (res < INT_MAX)
2243 res++;
2244 }
2245 }
2246 return(res);
2247 }
2248
2249 /************************************************************************
2250 * *
2251 * Commodity functions to handle entities *
2252 * *
2253 ************************************************************************/
2254
2255 /**
2256 * xmlPopInput:
2257 * @ctxt: an XML parser context
2258 *
2259 * xmlPopInput: the current input pointed by ctxt->input came to an end
2260 * pop it and return the next char.
2261 *
2262 * Returns the current xmlChar in the parser context
2263 */
2264 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2265 xmlPopInput(xmlParserCtxtPtr ctxt) {
2266 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2267 if (xmlParserDebugEntities)
2268 xmlGenericError(xmlGenericErrorContext,
2269 "Popping input %d\n", ctxt->inputNr);
2270 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271 (ctxt->instate != XML_PARSER_EOF))
2272 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2273 "Unfinished entity outside the DTD");
2274 xmlFreeInputStream(inputPop(ctxt));
2275 if (*ctxt->input->cur == 0)
2276 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2277 return(CUR);
2278 }
2279
2280 /**
2281 * xmlPushInput:
2282 * @ctxt: an XML parser context
2283 * @input: an XML parser input fragment (entity, XML fragment ...).
2284 *
2285 * xmlPushInput: switch to a new input stream which is stacked on top
2286 * of the previous one(s).
2287 * Returns -1 in case of error or the index in the input stack
2288 */
2289 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2290 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2291 int ret;
2292 if (input == NULL) return(-1);
2293
2294 if (xmlParserDebugEntities) {
2295 if ((ctxt->input != NULL) && (ctxt->input->filename))
2296 xmlGenericError(xmlGenericErrorContext,
2297 "%s(%d): ", ctxt->input->filename,
2298 ctxt->input->line);
2299 xmlGenericError(xmlGenericErrorContext,
2300 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301 }
2302 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303 (ctxt->inputNr > 1024)) {
2304 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2305 while (ctxt->inputNr > 1)
2306 xmlFreeInputStream(inputPop(ctxt));
2307 return(-1);
2308 }
2309 ret = inputPush(ctxt, input);
2310 if (ctxt->instate == XML_PARSER_EOF)
2311 return(-1);
2312 GROW;
2313 return(ret);
2314 }
2315
2316 /**
2317 * xmlParseCharRef:
2318 * @ctxt: an XML parser context
2319 *
2320 * parse Reference declarations
2321 *
2322 * [66] CharRef ::= '&#' [0-9]+ ';' |
2323 * '&#x' [0-9a-fA-F]+ ';'
2324 *
2325 * [ WFC: Legal Character ]
2326 * Characters referred to using character references must match the
2327 * production for Char.
2328 *
2329 * Returns the value parsed (as an int), 0 in case of error
2330 */
2331 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2332 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2333 int val = 0;
2334 int count = 0;
2335
2336 /*
2337 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338 */
2339 if ((RAW == '&') && (NXT(1) == '#') &&
2340 (NXT(2) == 'x')) {
2341 SKIP(3);
2342 GROW;
2343 while (RAW != ';') { /* loop blocked by count */
2344 if (count++ > 20) {
2345 count = 0;
2346 GROW;
2347 if (ctxt->instate == XML_PARSER_EOF)
2348 return(0);
2349 }
2350 if ((RAW >= '0') && (RAW <= '9'))
2351 val = val * 16 + (CUR - '0');
2352 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353 val = val * 16 + (CUR - 'a') + 10;
2354 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355 val = val * 16 + (CUR - 'A') + 10;
2356 else {
2357 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2358 val = 0;
2359 break;
2360 }
2361 if (val > 0x110000)
2362 val = 0x110000;
2363
2364 NEXT;
2365 count++;
2366 }
2367 if (RAW == ';') {
2368 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 ctxt->input->col++;
2370 ctxt->input->cur++;
2371 }
2372 } else if ((RAW == '&') && (NXT(1) == '#')) {
2373 SKIP(2);
2374 GROW;
2375 while (RAW != ';') { /* loop blocked by count */
2376 if (count++ > 20) {
2377 count = 0;
2378 GROW;
2379 if (ctxt->instate == XML_PARSER_EOF)
2380 return(0);
2381 }
2382 if ((RAW >= '0') && (RAW <= '9'))
2383 val = val * 10 + (CUR - '0');
2384 else {
2385 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2386 val = 0;
2387 break;
2388 }
2389 if (val > 0x110000)
2390 val = 0x110000;
2391
2392 NEXT;
2393 count++;
2394 }
2395 if (RAW == ';') {
2396 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397 ctxt->input->col++;
2398 ctxt->input->cur++;
2399 }
2400 } else {
2401 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2402 }
2403
2404 /*
2405 * [ WFC: Legal Character ]
2406 * Characters referred to using character references must match the
2407 * production for Char.
2408 */
2409 if (val >= 0x110000) {
2410 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411 "xmlParseCharRef: character reference out of bounds\n",
2412 val);
2413 } else if (IS_CHAR(val)) {
2414 return(val);
2415 } else {
2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417 "xmlParseCharRef: invalid xmlChar value %d\n",
2418 val);
2419 }
2420 return(0);
2421 }
2422
2423 /**
2424 * xmlParseStringCharRef:
2425 * @ctxt: an XML parser context
2426 * @str: a pointer to an index in the string
2427 *
2428 * parse Reference declarations, variant parsing from a string rather
2429 * than an an input flow.
2430 *
2431 * [66] CharRef ::= '&#' [0-9]+ ';' |
2432 * '&#x' [0-9a-fA-F]+ ';'
2433 *
2434 * [ WFC: Legal Character ]
2435 * Characters referred to using character references must match the
2436 * production for Char.
2437 *
2438 * Returns the value parsed (as an int), 0 in case of error, str will be
2439 * updated to the current value of the index
2440 */
2441 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2442 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2443 const xmlChar *ptr;
2444 xmlChar cur;
2445 int val = 0;
2446
2447 if ((str == NULL) || (*str == NULL)) return(0);
2448 ptr = *str;
2449 cur = *ptr;
2450 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451 ptr += 3;
2452 cur = *ptr;
2453 while (cur != ';') { /* Non input consuming loop */
2454 if ((cur >= '0') && (cur <= '9'))
2455 val = val * 16 + (cur - '0');
2456 else if ((cur >= 'a') && (cur <= 'f'))
2457 val = val * 16 + (cur - 'a') + 10;
2458 else if ((cur >= 'A') && (cur <= 'F'))
2459 val = val * 16 + (cur - 'A') + 10;
2460 else {
2461 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2462 val = 0;
2463 break;
2464 }
2465 if (val > 0x110000)
2466 val = 0x110000;
2467
2468 ptr++;
2469 cur = *ptr;
2470 }
2471 if (cur == ';')
2472 ptr++;
2473 } else if ((cur == '&') && (ptr[1] == '#')){
2474 ptr += 2;
2475 cur = *ptr;
2476 while (cur != ';') { /* Non input consuming loops */
2477 if ((cur >= '0') && (cur <= '9'))
2478 val = val * 10 + (cur - '0');
2479 else {
2480 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2481 val = 0;
2482 break;
2483 }
2484 if (val > 0x110000)
2485 val = 0x110000;
2486
2487 ptr++;
2488 cur = *ptr;
2489 }
2490 if (cur == ';')
2491 ptr++;
2492 } else {
2493 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2494 return(0);
2495 }
2496 *str = ptr;
2497
2498 /*
2499 * [ WFC: Legal Character ]
2500 * Characters referred to using character references must match the
2501 * production for Char.
2502 */
2503 if (val >= 0x110000) {
2504 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505 "xmlParseStringCharRef: character reference out of bounds\n",
2506 val);
2507 } else if (IS_CHAR(val)) {
2508 return(val);
2509 } else {
2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512 val);
2513 }
2514 return(0);
2515 }
2516
2517 /**
2518 * xmlParserHandlePEReference:
2519 * @ctxt: the parser context
2520 *
2521 * [69] PEReference ::= '%' Name ';'
2522 *
2523 * [ WFC: No Recursion ]
2524 * A parsed entity must not contain a recursive
2525 * reference to itself, either directly or indirectly.
2526 *
2527 * [ WFC: Entity Declared ]
2528 * In a document without any DTD, a document with only an internal DTD
2529 * subset which contains no parameter entity references, or a document
2530 * with "standalone='yes'", ... ... The declaration of a parameter
2531 * entity must precede any reference to it...
2532 *
2533 * [ VC: Entity Declared ]
2534 * In a document with an external subset or external parameter entities
2535 * with "standalone='no'", ... ... The declaration of a parameter entity
2536 * must precede any reference to it...
2537 *
2538 * [ WFC: In DTD ]
2539 * Parameter-entity references may only appear in the DTD.
2540 * NOTE: misleading but this is handled.
2541 *
2542 * A PEReference may have been detected in the current input stream
2543 * the handling is done accordingly to
2544 * http://www.w3.org/TR/REC-xml#entproc
2545 * i.e.
2546 * - Included in literal in entity values
2547 * - Included as Parameter Entity reference within DTDs
2548 */
2549 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2550 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2551 switch(ctxt->instate) {
2552 case XML_PARSER_CDATA_SECTION:
2553 return;
2554 case XML_PARSER_COMMENT:
2555 return;
2556 case XML_PARSER_START_TAG:
2557 return;
2558 case XML_PARSER_END_TAG:
2559 return;
2560 case XML_PARSER_EOF:
2561 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2562 return;
2563 case XML_PARSER_PROLOG:
2564 case XML_PARSER_START:
2565 case XML_PARSER_MISC:
2566 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2567 return;
2568 case XML_PARSER_ENTITY_DECL:
2569 case XML_PARSER_CONTENT:
2570 case XML_PARSER_ATTRIBUTE_VALUE:
2571 case XML_PARSER_PI:
2572 case XML_PARSER_SYSTEM_LITERAL:
2573 case XML_PARSER_PUBLIC_LITERAL:
2574 /* we just ignore it there */
2575 return;
2576 case XML_PARSER_EPILOG:
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2578 return;
2579 case XML_PARSER_ENTITY_VALUE:
2580 /*
2581 * NOTE: in the case of entity values, we don't do the
2582 * substitution here since we need the literal
2583 * entity value to be able to save the internal
2584 * subset of the document.
2585 * This will be handled by xmlStringDecodeEntities
2586 */
2587 return;
2588 case XML_PARSER_DTD:
2589 /*
2590 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591 * In the internal DTD subset, parameter-entity references
2592 * can occur only where markup declarations can occur, not
2593 * within markup declarations.
2594 * In that case this is handled in xmlParseMarkupDecl
2595 */
2596 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597 return;
2598 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599 return;
2600 break;
2601 case XML_PARSER_IGNORE:
2602 return;
2603 }
2604
2605 xmlParsePEReference(ctxt);
2606 }
2607
2608 /*
2609 * Macro used to grow the current buffer.
2610 * buffer##_size is expected to be a size_t
2611 * mem_error: is expected to handle memory allocation failures
2612 */
2613 #define growBuffer(buffer, n) { \
2614 xmlChar *tmp; \
2615 size_t new_size = buffer##_size * 2 + n; \
2616 if (new_size < buffer##_size) goto mem_error; \
2617 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2618 if (tmp == NULL) goto mem_error; \
2619 buffer = tmp; \
2620 buffer##_size = new_size; \
2621 }
2622
2623 /**
2624 * xmlStringLenDecodeEntities:
2625 * @ctxt: the parser context
2626 * @str: the input string
2627 * @len: the string length
2628 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629 * @end: an end marker xmlChar, 0 if none
2630 * @end2: an end marker xmlChar, 0 if none
2631 * @end3: an end marker xmlChar, 0 if none
2632 *
2633 * Takes a entity string content and process to do the adequate substitutions.
2634 *
2635 * [67] Reference ::= EntityRef | CharRef
2636 *
2637 * [69] PEReference ::= '%' Name ';'
2638 *
2639 * Returns A newly allocated string with the substitution done. The caller
2640 * must deallocate it !
2641 */
2642 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2643 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2644 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2645 xmlChar *buffer = NULL;
2646 size_t buffer_size = 0;
2647 size_t nbchars = 0;
2648
2649 xmlChar *current = NULL;
2650 xmlChar *rep = NULL;
2651 const xmlChar *last;
2652 xmlEntityPtr ent;
2653 int c,l;
2654
2655 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656 return(NULL);
2657 last = str + len;
2658
2659 if (((ctxt->depth > 40) &&
2660 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661 (ctxt->depth > 1024)) {
2662 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2663 return(NULL);
2664 }
2665
2666 /*
2667 * allocate a translation buffer.
2668 */
2669 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2670 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2671 if (buffer == NULL) goto mem_error;
2672
2673 /*
2674 * OK loop until we reach one of the ending char or a size limit.
2675 * we are operating on already parsed values.
2676 */
2677 if (str < last)
2678 c = CUR_SCHAR(str, l);
2679 else
2680 c = 0;
2681 while ((c != 0) && (c != end) && /* non input consuming loop */
2682 (c != end2) && (c != end3) &&
2683 (ctxt->instate != XML_PARSER_EOF)) {
2684
2685 if (c == 0) break;
2686 if ((c == '&') && (str[1] == '#')) {
2687 int val = xmlParseStringCharRef(ctxt, &str);
2688 if (val == 0)
2689 goto int_error;
2690 COPY_BUF(0,buffer,nbchars,val);
2691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2693 }
2694 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2695 if (xmlParserDebugEntities)
2696 xmlGenericError(xmlGenericErrorContext,
2697 "String decoding Entity Reference: %.30s\n",
2698 str);
2699 ent = xmlParseStringEntityRef(ctxt, &str);
2700 xmlParserEntityCheck(ctxt, 0, ent, 0);
2701 if (ent != NULL)
2702 ctxt->nbentities += ent->checked / 2;
2703 if ((ent != NULL) &&
2704 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705 if (ent->content != NULL) {
2706 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2708 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2709 }
2710 } else {
2711 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712 "predefined entity has no content\n");
2713 goto int_error;
2714 }
2715 } else if ((ent != NULL) && (ent->content != NULL)) {
2716 ctxt->depth++;
2717 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718 0, 0, 0);
2719 ctxt->depth--;
2720 if (rep == NULL) {
2721 ent->content[0] = 0;
2722 goto int_error;
2723 }
2724
2725 current = rep;
2726 while (*current != 0) { /* non input consuming loop */
2727 buffer[nbchars++] = *current++;
2728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730 goto int_error;
2731 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2732 }
2733 }
2734 xmlFree(rep);
2735 rep = NULL;
2736 } else if (ent != NULL) {
2737 int i = xmlStrlen(ent->name);
2738 const xmlChar *cur = ent->name;
2739
2740 buffer[nbchars++] = '&';
2741 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2742 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2743 }
2744 for (;i > 0;i--)
2745 buffer[nbchars++] = *cur++;
2746 buffer[nbchars++] = ';';
2747 }
2748 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2749 if (xmlParserDebugEntities)
2750 xmlGenericError(xmlGenericErrorContext,
2751 "String decoding PE Reference: %.30s\n", str);
2752 ent = xmlParseStringPEReference(ctxt, &str);
2753 xmlParserEntityCheck(ctxt, 0, ent, 0);
2754 if (ent != NULL)
2755 ctxt->nbentities += ent->checked / 2;
2756 if (ent != NULL) {
2757 if (ent->content == NULL) {
2758 /*
2759 * Note: external parsed entities will not be loaded,
2760 * it is not required for a non-validating parser to
2761 * complete external PEReferences coming from the
2762 * internal subset
2763 */
2764 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766 (ctxt->validate != 0)) {
2767 xmlLoadEntityContent(ctxt, ent);
2768 } else {
2769 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770 "not validating will not read content for PE entity %s\n",
2771 ent->name, NULL);
2772 }
2773 }
2774 ctxt->depth++;
2775 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776 0, 0, 0);
2777 ctxt->depth--;
2778 if (rep == NULL) {
2779 if (ent->content != NULL)
2780 ent->content[0] = 0;
2781 goto int_error;
2782 }
2783 current = rep;
2784 while (*current != 0) { /* non input consuming loop */
2785 buffer[nbchars++] = *current++;
2786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788 goto int_error;
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790 }
2791 }
2792 xmlFree(rep);
2793 rep = NULL;
2794 }
2795 } else {
2796 COPY_BUF(l,buffer,nbchars,c);
2797 str += l;
2798 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2799 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800 }
2801 }
2802 if (str < last)
2803 c = CUR_SCHAR(str, l);
2804 else
2805 c = 0;
2806 }
2807 buffer[nbchars] = 0;
2808 return(buffer);
2809
2810 mem_error:
2811 xmlErrMemory(ctxt, NULL);
2812 int_error:
2813 if (rep != NULL)
2814 xmlFree(rep);
2815 if (buffer != NULL)
2816 xmlFree(buffer);
2817 return(NULL);
2818 }
2819
2820 /**
2821 * xmlStringDecodeEntities:
2822 * @ctxt: the parser context
2823 * @str: the input string
2824 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825 * @end: an end marker xmlChar, 0 if none
2826 * @end2: an end marker xmlChar, 0 if none
2827 * @end3: an end marker xmlChar, 0 if none
2828 *
2829 * Takes a entity string content and process to do the adequate substitutions.
2830 *
2831 * [67] Reference ::= EntityRef | CharRef
2832 *
2833 * [69] PEReference ::= '%' Name ';'
2834 *
2835 * Returns A newly allocated string with the substitution done. The caller
2836 * must deallocate it !
2837 */
2838 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2839 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840 xmlChar end, xmlChar end2, xmlChar end3) {
2841 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843 end, end2, end3));
2844 }
2845
2846 /************************************************************************
2847 * *
2848 * Commodity functions, cleanup needed ? *
2849 * *
2850 ************************************************************************/
2851
2852 /**
2853 * areBlanks:
2854 * @ctxt: an XML parser context
2855 * @str: a xmlChar *
2856 * @len: the size of @str
2857 * @blank_chars: we know the chars are blanks
2858 *
2859 * Is this a sequence of blank chars that one can ignore ?
2860 *
2861 * Returns 1 if ignorable 0 otherwise.
2862 */
2863
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2864 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865 int blank_chars) {
2866 int i, ret;
2867 xmlNodePtr lastChild;
2868
2869 /*
2870 * Don't spend time trying to differentiate them, the same callback is
2871 * used !
2872 */
2873 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874 return(0);
2875
2876 /*
2877 * Check for xml:space value.
2878 */
2879 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880 (*(ctxt->space) == -2))
2881 return(0);
2882
2883 /*
2884 * Check that the string is made of blanks
2885 */
2886 if (blank_chars == 0) {
2887 for (i = 0;i < len;i++)
2888 if (!(IS_BLANK_CH(str[i]))) return(0);
2889 }
2890
2891 /*
2892 * Look if the element is mixed content in the DTD if available
2893 */
2894 if (ctxt->node == NULL) return(0);
2895 if (ctxt->myDoc != NULL) {
2896 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897 if (ret == 0) return(1);
2898 if (ret == 1) return(0);
2899 }
2900
2901 /*
2902 * Otherwise, heuristic :-\
2903 */
2904 if ((RAW != '<') && (RAW != 0xD)) return(0);
2905 if ((ctxt->node->children == NULL) &&
2906 (RAW == '<') && (NXT(1) == '/')) return(0);
2907
2908 lastChild = xmlGetLastChild(ctxt->node);
2909 if (lastChild == NULL) {
2910 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911 (ctxt->node->content != NULL)) return(0);
2912 } else if (xmlNodeIsText(lastChild))
2913 return(0);
2914 else if ((ctxt->node->children != NULL) &&
2915 (xmlNodeIsText(ctxt->node->children)))
2916 return(0);
2917 return(1);
2918 }
2919
2920 /************************************************************************
2921 * *
2922 * Extra stuff for namespace support *
2923 * Relates to http://www.w3.org/TR/WD-xml-names *
2924 * *
2925 ************************************************************************/
2926
2927 /**
2928 * xmlSplitQName:
2929 * @ctxt: an XML parser context
2930 * @name: an XML parser context
2931 * @prefix: a xmlChar **
2932 *
2933 * parse an UTF8 encoded XML qualified name string
2934 *
2935 * [NS 5] QName ::= (Prefix ':')? LocalPart
2936 *
2937 * [NS 6] Prefix ::= NCName
2938 *
2939 * [NS 7] LocalPart ::= NCName
2940 *
2941 * Returns the local part, and prefix is updated
2942 * to get the Prefix if any.
2943 */
2944
2945 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2946 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947 xmlChar buf[XML_MAX_NAMELEN + 5];
2948 xmlChar *buffer = NULL;
2949 int len = 0;
2950 int max = XML_MAX_NAMELEN;
2951 xmlChar *ret = NULL;
2952 const xmlChar *cur = name;
2953 int c;
2954
2955 if (prefix == NULL) return(NULL);
2956 *prefix = NULL;
2957
2958 if (cur == NULL) return(NULL);
2959
2960 #ifndef XML_XML_NAMESPACE
2961 /* xml: prefix is not really a namespace */
2962 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963 (cur[2] == 'l') && (cur[3] == ':'))
2964 return(xmlStrdup(name));
2965 #endif
2966
2967 /* nasty but well=formed */
2968 if (cur[0] == ':')
2969 return(xmlStrdup(name));
2970
2971 c = *cur++;
2972 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973 buf[len++] = c;
2974 c = *cur++;
2975 }
2976 if (len >= max) {
2977 /*
2978 * Okay someone managed to make a huge name, so he's ready to pay
2979 * for the processing speed.
2980 */
2981 max = len * 2;
2982
2983 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984 if (buffer == NULL) {
2985 xmlErrMemory(ctxt, NULL);
2986 return(NULL);
2987 }
2988 memcpy(buffer, buf, len);
2989 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990 if (len + 10 > max) {
2991 xmlChar *tmp;
2992
2993 max *= 2;
2994 tmp = (xmlChar *) xmlRealloc(buffer,
2995 max * sizeof(xmlChar));
2996 if (tmp == NULL) {
2997 xmlFree(buffer);
2998 xmlErrMemory(ctxt, NULL);
2999 return(NULL);
3000 }
3001 buffer = tmp;
3002 }
3003 buffer[len++] = c;
3004 c = *cur++;
3005 }
3006 buffer[len] = 0;
3007 }
3008
3009 if ((c == ':') && (*cur == 0)) {
3010 if (buffer != NULL)
3011 xmlFree(buffer);
3012 *prefix = NULL;
3013 return(xmlStrdup(name));
3014 }
3015
3016 if (buffer == NULL)
3017 ret = xmlStrndup(buf, len);
3018 else {
3019 ret = buffer;
3020 buffer = NULL;
3021 max = XML_MAX_NAMELEN;
3022 }
3023
3024
3025 if (c == ':') {
3026 c = *cur;
3027 *prefix = ret;
3028 if (c == 0) {
3029 return(xmlStrndup(BAD_CAST "", 0));
3030 }
3031 len = 0;
3032
3033 /*
3034 * Check that the first character is proper to start
3035 * a new name
3036 */
3037 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038 ((c >= 0x41) && (c <= 0x5A)) ||
3039 (c == '_') || (c == ':'))) {
3040 int l;
3041 int first = CUR_SCHAR(cur, l);
3042
3043 if (!IS_LETTER(first) && (first != '_')) {
3044 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045 "Name %s is not XML Namespace compliant\n",
3046 name);
3047 }
3048 }
3049 cur++;
3050
3051 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052 buf[len++] = c;
3053 c = *cur++;
3054 }
3055 if (len >= max) {
3056 /*
3057 * Okay someone managed to make a huge name, so he's ready to pay
3058 * for the processing speed.
3059 */
3060 max = len * 2;
3061
3062 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063 if (buffer == NULL) {
3064 xmlErrMemory(ctxt, NULL);
3065 return(NULL);
3066 }
3067 memcpy(buffer, buf, len);
3068 while (c != 0) { /* tested bigname2.xml */
3069 if (len + 10 > max) {
3070 xmlChar *tmp;
3071
3072 max *= 2;
3073 tmp = (xmlChar *) xmlRealloc(buffer,
3074 max * sizeof(xmlChar));
3075 if (tmp == NULL) {
3076 xmlErrMemory(ctxt, NULL);
3077 xmlFree(buffer);
3078 return(NULL);
3079 }
3080 buffer = tmp;
3081 }
3082 buffer[len++] = c;
3083 c = *cur++;
3084 }
3085 buffer[len] = 0;
3086 }
3087
3088 if (buffer == NULL)
3089 ret = xmlStrndup(buf, len);
3090 else {
3091 ret = buffer;
3092 }
3093 }
3094
3095 return(ret);
3096 }
3097
3098 /************************************************************************
3099 * *
3100 * The parser itself *
3101 * Relates to http://www.w3.org/TR/REC-xml *
3102 * *
3103 ************************************************************************/
3104
3105 /************************************************************************
3106 * *
3107 * Routines to parse Name, NCName and NmToken *
3108 * *
3109 ************************************************************************/
3110 #ifdef DEBUG
3111 static unsigned long nbParseName = 0;
3112 static unsigned long nbParseNmToken = 0;
3113 static unsigned long nbParseNCName = 0;
3114 static unsigned long nbParseNCNameComplex = 0;
3115 static unsigned long nbParseNameComplex = 0;
3116 static unsigned long nbParseStringName = 0;
3117 #endif
3118
3119 /*
3120 * The two following functions are related to the change of accepted
3121 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122 * They correspond to the modified production [4] and the new production [4a]
3123 * changes in that revision. Also note that the macros used for the
3124 * productions Letter, Digit, CombiningChar and Extender are not needed
3125 * anymore.
3126 * We still keep compatibility to pre-revision5 parsing semantic if the
3127 * new XML_PARSE_OLD10 option is given to the parser.
3128 */
3129 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3130 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3131 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132 /*
3133 * Use the new checks of production [4] [4a] amd [5] of the
3134 * Update 5 of XML-1.0
3135 */
3136 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137 (((c >= 'a') && (c <= 'z')) ||
3138 ((c >= 'A') && (c <= 'Z')) ||
3139 (c == '_') || (c == ':') ||
3140 ((c >= 0xC0) && (c <= 0xD6)) ||
3141 ((c >= 0xD8) && (c <= 0xF6)) ||
3142 ((c >= 0xF8) && (c <= 0x2FF)) ||
3143 ((c >= 0x370) && (c <= 0x37D)) ||
3144 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145 ((c >= 0x200C) && (c <= 0x200D)) ||
3146 ((c >= 0x2070) && (c <= 0x218F)) ||
3147 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 return(1);
3153 } else {
3154 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155 return(1);
3156 }
3157 return(0);
3158 }
3159
3160 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3161 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3162 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163 /*
3164 * Use the new checks of production [4] [4a] amd [5] of the
3165 * Update 5 of XML-1.0
3166 */
3167 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168 (((c >= 'a') && (c <= 'z')) ||
3169 ((c >= 'A') && (c <= 'Z')) ||
3170 ((c >= '0') && (c <= '9')) || /* !start */
3171 (c == '_') || (c == ':') ||
3172 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173 ((c >= 0xC0) && (c <= 0xD6)) ||
3174 ((c >= 0xD8) && (c <= 0xF6)) ||
3175 ((c >= 0xF8) && (c <= 0x2FF)) ||
3176 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181 ((c >= 0x2070) && (c <= 0x218F)) ||
3182 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186 ((c >= 0x10000) && (c <= 0xEFFFF))))
3187 return(1);
3188 } else {
3189 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190 (c == '.') || (c == '-') ||
3191 (c == '_') || (c == ':') ||
3192 (IS_COMBINING(c)) ||
3193 (IS_EXTENDER(c)))
3194 return(1);
3195 }
3196 return(0);
3197 }
3198
3199 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3200 int *len, int *alloc, int normalize);
3201
3202 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3203 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3204 int len = 0, l;
3205 int c;
3206 int count = 0;
3207 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3208 XML_MAX_TEXT_LENGTH :
3209 XML_MAX_NAME_LENGTH;
3210
3211 #ifdef DEBUG
3212 nbParseNameComplex++;
3213 #endif
3214
3215 /*
3216 * Handler for more complex cases
3217 */
3218 GROW;
3219 if (ctxt->instate == XML_PARSER_EOF)
3220 return(NULL);
3221 c = CUR_CHAR(l);
3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223 /*
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3226 */
3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 (!(((c >= 'a') && (c <= 'z')) ||
3229 ((c >= 'A') && (c <= 'Z')) ||
3230 (c == '_') || (c == ':') ||
3231 ((c >= 0xC0) && (c <= 0xD6)) ||
3232 ((c >= 0xD8) && (c <= 0xF6)) ||
3233 ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 ((c >= 0x370) && (c <= 0x37D)) ||
3235 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 ((c >= 0x200C) && (c <= 0x200D)) ||
3237 ((c >= 0x2070) && (c <= 0x218F)) ||
3238 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 return(NULL);
3244 }
3245 len += l;
3246 NEXTL(l);
3247 c = CUR_CHAR(l);
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 (((c >= 'a') && (c <= 'z')) ||
3250 ((c >= 'A') && (c <= 'Z')) ||
3251 ((c >= '0') && (c <= '9')) || /* !start */
3252 (c == '_') || (c == ':') ||
3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 ((c >= 0x370) && (c <= 0x37D)) ||
3259 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 ((c >= 0x200C) && (c <= 0x200D)) ||
3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 ((c >= 0x2070) && (c <= 0x218F)) ||
3263 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 ((c >= 0x10000) && (c <= 0xEFFFF))
3268 )) {
3269 if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 count = 0;
3271 GROW;
3272 if (ctxt->instate == XML_PARSER_EOF)
3273 return(NULL);
3274 }
3275 if (len <= INT_MAX - l)
3276 len += l;
3277 NEXTL(l);
3278 c = CUR_CHAR(l);
3279 }
3280 } else {
3281 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 (!IS_LETTER(c) && (c != '_') &&
3283 (c != ':'))) {
3284 return(NULL);
3285 }
3286 len += l;
3287 NEXTL(l);
3288 c = CUR_CHAR(l);
3289
3290 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 (c == '.') || (c == '-') ||
3293 (c == '_') || (c == ':') ||
3294 (IS_COMBINING(c)) ||
3295 (IS_EXTENDER(c)))) {
3296 if (count++ > XML_PARSER_CHUNK_SIZE) {
3297 count = 0;
3298 GROW;
3299 if (ctxt->instate == XML_PARSER_EOF)
3300 return(NULL);
3301 }
3302 if (len <= INT_MAX - l)
3303 len += l;
3304 NEXTL(l);
3305 c = CUR_CHAR(l);
3306 }
3307 }
3308 if (len > maxLength) {
3309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310 return(NULL);
3311 }
3312 if (ctxt->input->cur - ctxt->input->base < len) {
3313 /*
3314 * There were a couple of bugs where PERefs lead to to a change
3315 * of the buffer. Check the buffer size to avoid passing an invalid
3316 * pointer to xmlDictLookup.
3317 */
3318 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3319 "unexpected change of input buffer");
3320 return (NULL);
3321 }
3322 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3325 }
3326
3327 /**
3328 * xmlParseName:
3329 * @ctxt: an XML parser context
3330 *
3331 * parse an XML name.
3332 *
3333 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334 * CombiningChar | Extender
3335 *
3336 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3337 *
3338 * [6] Names ::= Name (#x20 Name)*
3339 *
3340 * Returns the Name parsed or NULL
3341 */
3342
3343 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3344 xmlParseName(xmlParserCtxtPtr ctxt) {
3345 const xmlChar *in;
3346 const xmlChar *ret;
3347 size_t count = 0;
3348 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349 XML_MAX_TEXT_LENGTH :
3350 XML_MAX_NAME_LENGTH;
3351
3352 GROW;
3353
3354 #ifdef DEBUG
3355 nbParseName++;
3356 #endif
3357
3358 /*
3359 * Accelerator for simple ASCII names
3360 */
3361 in = ctxt->input->cur;
3362 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 (*in == '_') || (*in == ':')) {
3365 in++;
3366 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367 ((*in >= 0x41) && (*in <= 0x5A)) ||
3368 ((*in >= 0x30) && (*in <= 0x39)) ||
3369 (*in == '_') || (*in == '-') ||
3370 (*in == ':') || (*in == '.'))
3371 in++;
3372 if ((*in > 0) && (*in < 0x80)) {
3373 count = in - ctxt->input->cur;
3374 if (count > maxLength) {
3375 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376 return(NULL);
3377 }
3378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379 ctxt->input->cur = in;
3380 ctxt->input->col += count;
3381 if (ret == NULL)
3382 xmlErrMemory(ctxt, NULL);
3383 return(ret);
3384 }
3385 }
3386 /* accelerator for special cases */
3387 return(xmlParseNameComplex(ctxt));
3388 }
3389
3390 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3391 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3392 int len = 0, l;
3393 int c;
3394 int count = 0;
3395 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3396 XML_MAX_TEXT_LENGTH :
3397 XML_MAX_NAME_LENGTH;
3398 size_t startPosition = 0;
3399
3400 #ifdef DEBUG
3401 nbParseNCNameComplex++;
3402 #endif
3403
3404 /*
3405 * Handler for more complex cases
3406 */
3407 GROW;
3408 startPosition = CUR_PTR - BASE_PTR;
3409 c = CUR_CHAR(l);
3410 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412 return(NULL);
3413 }
3414
3415 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417 if (count++ > XML_PARSER_CHUNK_SIZE) {
3418 count = 0;
3419 GROW;
3420 if (ctxt->instate == XML_PARSER_EOF)
3421 return(NULL);
3422 }
3423 if (len <= INT_MAX - l)
3424 len += l;
3425 NEXTL(l);
3426 c = CUR_CHAR(l);
3427 if (c == 0) {
3428 count = 0;
3429 /*
3430 * when shrinking to extend the buffer we really need to preserve
3431 * the part of the name we already parsed. Hence rolling back
3432 * by current length.
3433 */
3434 ctxt->input->cur -= l;
3435 GROW;
3436 if (ctxt->instate == XML_PARSER_EOF)
3437 return(NULL);
3438 ctxt->input->cur += l;
3439 c = CUR_CHAR(l);
3440 }
3441 }
3442 if (len > maxLength) {
3443 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444 return(NULL);
3445 }
3446 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3447 }
3448
3449 /**
3450 * xmlParseNCName:
3451 * @ctxt: an XML parser context
3452 * @len: length of the string parsed
3453 *
3454 * parse an XML name.
3455 *
3456 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3457 * CombiningChar | Extender
3458 *
3459 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3460 *
3461 * Returns the Name parsed or NULL
3462 */
3463
3464 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3465 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3466 const xmlChar *in, *e;
3467 const xmlChar *ret;
3468 size_t count = 0;
3469 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3470 XML_MAX_TEXT_LENGTH :
3471 XML_MAX_NAME_LENGTH;
3472
3473 #ifdef DEBUG
3474 nbParseNCName++;
3475 #endif
3476
3477 /*
3478 * Accelerator for simple ASCII names
3479 */
3480 in = ctxt->input->cur;
3481 e = ctxt->input->end;
3482 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 (*in == '_')) && (in < e)) {
3485 in++;
3486 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487 ((*in >= 0x41) && (*in <= 0x5A)) ||
3488 ((*in >= 0x30) && (*in <= 0x39)) ||
3489 (*in == '_') || (*in == '-') ||
3490 (*in == '.')) && (in < e))
3491 in++;
3492 if (in >= e)
3493 goto complex;
3494 if ((*in > 0) && (*in < 0x80)) {
3495 count = in - ctxt->input->cur;
3496 if (count > maxLength) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498 return(NULL);
3499 }
3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->input->col += count;
3503 if (ret == NULL) {
3504 xmlErrMemory(ctxt, NULL);
3505 }
3506 return(ret);
3507 }
3508 }
3509 complex:
3510 return(xmlParseNCNameComplex(ctxt));
3511 }
3512
3513 /**
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3516 *
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3519 *
3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3522 */
3523
3524 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
3528 const xmlChar *ret;
3529
3530 GROW;
3531 if (ctxt->instate == XML_PARSER_EOF)
3532 return(NULL);
3533
3534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
3536 ++in;
3537 ++cmp;
3538 }
3539 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540 /* success */
3541 ctxt->input->col += in - ctxt->input->cur;
3542 ctxt->input->cur = in;
3543 return (const xmlChar*) 1;
3544 }
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
3547 /* strings coming from the dictionary direct compare possible */
3548 if (ret == other) {
3549 return (const xmlChar*) 1;
3550 }
3551 return ret;
3552 }
3553
3554 /**
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3558 *
3559 * parse an XML name.
3560 *
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3563 *
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565 *
3566 * [6] Names ::= Name (#x20 Name)*
3567 *
3568 * Returns the Name parsed or NULL. The @str pointer
3569 * is updated to the current location in the string.
3570 */
3571
3572 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574 xmlChar buf[XML_MAX_NAMELEN + 5];
3575 const xmlChar *cur = *str;
3576 int len = 0, l;
3577 int c;
3578 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3579 XML_MAX_TEXT_LENGTH :
3580 XML_MAX_NAME_LENGTH;
3581
3582 #ifdef DEBUG
3583 nbParseStringName++;
3584 #endif
3585
3586 c = CUR_SCHAR(cur, l);
3587 if (!xmlIsNameStartChar(ctxt, c)) {
3588 return(NULL);
3589 }
3590
3591 COPY_BUF(l,buf,len,c);
3592 cur += l;
3593 c = CUR_SCHAR(cur, l);
3594 while (xmlIsNameChar(ctxt, c)) {
3595 COPY_BUF(l,buf,len,c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599 /*
3600 * Okay someone managed to make a huge name, so he's ready to pay
3601 * for the processing speed.
3602 */
3603 xmlChar *buffer;
3604 int max = len * 2;
3605
3606 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607 if (buffer == NULL) {
3608 xmlErrMemory(ctxt, NULL);
3609 return(NULL);
3610 }
3611 memcpy(buffer, buf, len);
3612 while (xmlIsNameChar(ctxt, c)) {
3613 if (len + 10 > max) {
3614 xmlChar *tmp;
3615
3616 max *= 2;
3617 tmp = (xmlChar *) xmlRealloc(buffer,
3618 max * sizeof(xmlChar));
3619 if (tmp == NULL) {
3620 xmlErrMemory(ctxt, NULL);
3621 xmlFree(buffer);
3622 return(NULL);
3623 }
3624 buffer = tmp;
3625 }
3626 COPY_BUF(l,buffer,len,c);
3627 cur += l;
3628 c = CUR_SCHAR(cur, l);
3629 if (len > maxLength) {
3630 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631 xmlFree(buffer);
3632 return(NULL);
3633 }
3634 }
3635 buffer[len] = 0;
3636 *str = cur;
3637 return(buffer);
3638 }
3639 }
3640 if (len > maxLength) {
3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642 return(NULL);
3643 }
3644 *str = cur;
3645 return(xmlStrndup(buf, len));
3646 }
3647
3648 /**
3649 * xmlParseNmtoken:
3650 * @ctxt: an XML parser context
3651 *
3652 * parse an XML Nmtoken.
3653 *
3654 * [7] Nmtoken ::= (NameChar)+
3655 *
3656 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3657 *
3658 * Returns the Nmtoken parsed or NULL
3659 */
3660
3661 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3662 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3663 xmlChar buf[XML_MAX_NAMELEN + 5];
3664 int len = 0, l;
3665 int c;
3666 int count = 0;
3667 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3668 XML_MAX_TEXT_LENGTH :
3669 XML_MAX_NAME_LENGTH;
3670
3671 #ifdef DEBUG
3672 nbParseNmToken++;
3673 #endif
3674
3675 GROW;
3676 if (ctxt->instate == XML_PARSER_EOF)
3677 return(NULL);
3678 c = CUR_CHAR(l);
3679
3680 while (xmlIsNameChar(ctxt, c)) {
3681 if (count++ > XML_PARSER_CHUNK_SIZE) {
3682 count = 0;
3683 GROW;
3684 }
3685 COPY_BUF(l,buf,len,c);
3686 NEXTL(l);
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 count = 0;
3690 GROW;
3691 if (ctxt->instate == XML_PARSER_EOF)
3692 return(NULL);
3693 c = CUR_CHAR(l);
3694 }
3695 if (len >= XML_MAX_NAMELEN) {
3696 /*
3697 * Okay someone managed to make a huge token, so he's ready to pay
3698 * for the processing speed.
3699 */
3700 xmlChar *buffer;
3701 int max = len * 2;
3702
3703 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704 if (buffer == NULL) {
3705 xmlErrMemory(ctxt, NULL);
3706 return(NULL);
3707 }
3708 memcpy(buffer, buf, len);
3709 while (xmlIsNameChar(ctxt, c)) {
3710 if (count++ > XML_PARSER_CHUNK_SIZE) {
3711 count = 0;
3712 GROW;
3713 if (ctxt->instate == XML_PARSER_EOF) {
3714 xmlFree(buffer);
3715 return(NULL);
3716 }
3717 }
3718 if (len + 10 > max) {
3719 xmlChar *tmp;
3720
3721 max *= 2;
3722 tmp = (xmlChar *) xmlRealloc(buffer,
3723 max * sizeof(xmlChar));
3724 if (tmp == NULL) {
3725 xmlErrMemory(ctxt, NULL);
3726 xmlFree(buffer);
3727 return(NULL);
3728 }
3729 buffer = tmp;
3730 }
3731 COPY_BUF(l,buffer,len,c);
3732 NEXTL(l);
3733 c = CUR_CHAR(l);
3734 if (len > maxLength) {
3735 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736 xmlFree(buffer);
3737 return(NULL);
3738 }
3739 }
3740 buffer[len] = 0;
3741 return(buffer);
3742 }
3743 }
3744 if (len == 0)
3745 return(NULL);
3746 if (len > maxLength) {
3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748 return(NULL);
3749 }
3750 return(xmlStrndup(buf, len));
3751 }
3752
3753 /**
3754 * xmlParseEntityValue:
3755 * @ctxt: an XML parser context
3756 * @orig: if non-NULL store a copy of the original entity value
3757 *
3758 * parse a value for ENTITY declarations
3759 *
3760 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761 * "'" ([^%&'] | PEReference | Reference)* "'"
3762 *
3763 * Returns the EntityValue parsed with reference substituted or NULL
3764 */
3765
3766 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3767 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3768 xmlChar *buf = NULL;
3769 int len = 0;
3770 int size = XML_PARSER_BUFFER_SIZE;
3771 int c, l;
3772 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3773 XML_MAX_HUGE_LENGTH :
3774 XML_MAX_TEXT_LENGTH;
3775 xmlChar stop;
3776 xmlChar *ret = NULL;
3777 const xmlChar *cur = NULL;
3778 xmlParserInputPtr input;
3779
3780 if (RAW == '"') stop = '"';
3781 else if (RAW == '\'') stop = '\'';
3782 else {
3783 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3784 return(NULL);
3785 }
3786 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787 if (buf == NULL) {
3788 xmlErrMemory(ctxt, NULL);
3789 return(NULL);
3790 }
3791
3792 /*
3793 * The content of the entity definition is copied in a buffer.
3794 */
3795
3796 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3797 input = ctxt->input;
3798 GROW;
3799 if (ctxt->instate == XML_PARSER_EOF)
3800 goto error;
3801 NEXT;
3802 c = CUR_CHAR(l);
3803 /*
3804 * NOTE: 4.4.5 Included in Literal
3805 * When a parameter entity reference appears in a literal entity
3806 * value, ... a single or double quote character in the replacement
3807 * text is always treated as a normal data character and will not
3808 * terminate the literal.
3809 * In practice it means we stop the loop only when back at parsing
3810 * the initial entity and the quote is found
3811 */
3812 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814 if (len + 5 >= size) {
3815 xmlChar *tmp;
3816
3817 size *= 2;
3818 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819 if (tmp == NULL) {
3820 xmlErrMemory(ctxt, NULL);
3821 goto error;
3822 }
3823 buf = tmp;
3824 }
3825 COPY_BUF(l,buf,len,c);
3826 NEXTL(l);
3827
3828 GROW;
3829 c = CUR_CHAR(l);
3830 if (c == 0) {
3831 GROW;
3832 c = CUR_CHAR(l);
3833 }
3834
3835 if (len > maxLength) {
3836 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837 "entity value too long\n");
3838 goto error;
3839 }
3840 }
3841 buf[len] = 0;
3842 if (ctxt->instate == XML_PARSER_EOF)
3843 goto error;
3844 if (c != stop) {
3845 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3846 goto error;
3847 }
3848 NEXT;
3849
3850 /*
3851 * Raise problem w.r.t. '&' and '%' being used in non-entities
3852 * reference constructs. Note Charref will be handled in
3853 * xmlStringDecodeEntities()
3854 */
3855 cur = buf;
3856 while (*cur != 0) { /* non input consuming */
3857 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858 xmlChar *name;
3859 xmlChar tmp = *cur;
3860 int nameOk = 0;
3861
3862 cur++;
3863 name = xmlParseStringName(ctxt, &cur);
3864 if (name != NULL) {
3865 nameOk = 1;
3866 xmlFree(name);
3867 }
3868 if ((nameOk == 0) || (*cur != ';')) {
3869 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870 "EntityValue: '%c' forbidden except for entities references\n",
3871 tmp);
3872 goto error;
3873 }
3874 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875 (ctxt->inputNr == 1)) {
3876 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3877 goto error;
3878 }
3879 if (*cur == 0)
3880 break;
3881 }
3882 cur++;
3883 }
3884
3885 /*
3886 * Then PEReference entities are substituted.
3887 *
3888 * NOTE: 4.4.7 Bypassed
3889 * When a general entity reference appears in the EntityValue in
3890 * an entity declaration, it is bypassed and left as is.
3891 * so XML_SUBSTITUTE_REF is not set here.
3892 */
3893 ++ctxt->depth;
3894 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3895 0, 0, 0);
3896 --ctxt->depth;
3897 if (orig != NULL) {
3898 *orig = buf;
3899 buf = NULL;
3900 }
3901
3902 error:
3903 if (buf != NULL)
3904 xmlFree(buf);
3905 return(ret);
3906 }
3907
3908 /**
3909 * xmlParseAttValueComplex:
3910 * @ctxt: an XML parser context
3911 * @len: the resulting attribute len
3912 * @normalize: whether to apply the inner normalization
3913 *
3914 * parse a value for an attribute, this is the fallback function
3915 * of xmlParseAttValue() when the attribute parsing requires handling
3916 * of non-ASCII characters, or normalization compaction.
3917 *
3918 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3919 */
3920 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3921 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3922 xmlChar limit = 0;
3923 xmlChar *buf = NULL;
3924 xmlChar *rep = NULL;
3925 size_t len = 0;
3926 size_t buf_size = 0;
3927 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3928 XML_MAX_HUGE_LENGTH :
3929 XML_MAX_TEXT_LENGTH;
3930 int c, l, in_space = 0;
3931 xmlChar *current = NULL;
3932 xmlEntityPtr ent;
3933
3934 if (NXT(0) == '"') {
3935 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3936 limit = '"';
3937 NEXT;
3938 } else if (NXT(0) == '\'') {
3939 limit = '\'';
3940 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3941 NEXT;
3942 } else {
3943 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3944 return(NULL);
3945 }
3946
3947 /*
3948 * allocate a translation buffer.
3949 */
3950 buf_size = XML_PARSER_BUFFER_SIZE;
3951 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952 if (buf == NULL) goto mem_error;
3953
3954 /*
3955 * OK loop until we reach one of the ending char or a size limit.
3956 */
3957 c = CUR_CHAR(l);
3958 while (((NXT(0) != limit) && /* checked */
3959 (IS_CHAR(c)) && (c != '<')) &&
3960 (ctxt->instate != XML_PARSER_EOF)) {
3961 if (c == '&') {
3962 in_space = 0;
3963 if (NXT(1) == '#') {
3964 int val = xmlParseCharRef(ctxt);
3965
3966 if (val == '&') {
3967 if (ctxt->replaceEntities) {
3968 if (len + 10 > buf_size) {
3969 growBuffer(buf, 10);
3970 }
3971 buf[len++] = '&';
3972 } else {
3973 /*
3974 * The reparsing will be done in xmlStringGetNodeList()
3975 * called by the attribute() function in SAX.c
3976 */
3977 if (len + 10 > buf_size) {
3978 growBuffer(buf, 10);
3979 }
3980 buf[len++] = '&';
3981 buf[len++] = '#';
3982 buf[len++] = '3';
3983 buf[len++] = '8';
3984 buf[len++] = ';';
3985 }
3986 } else if (val != 0) {
3987 if (len + 10 > buf_size) {
3988 growBuffer(buf, 10);
3989 }
3990 len += xmlCopyChar(0, &buf[len], val);
3991 }
3992 } else {
3993 ent = xmlParseEntityRef(ctxt);
3994 ctxt->nbentities++;
3995 if (ent != NULL)
3996 ctxt->nbentities += ent->owner;
3997 if ((ent != NULL) &&
3998 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3999 if (len + 10 > buf_size) {
4000 growBuffer(buf, 10);
4001 }
4002 if ((ctxt->replaceEntities == 0) &&
4003 (ent->content[0] == '&')) {
4004 buf[len++] = '&';
4005 buf[len++] = '#';
4006 buf[len++] = '3';
4007 buf[len++] = '8';
4008 buf[len++] = ';';
4009 } else {
4010 buf[len++] = ent->content[0];
4011 }
4012 } else if ((ent != NULL) &&
4013 (ctxt->replaceEntities != 0)) {
4014 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4015 ++ctxt->depth;
4016 rep = xmlStringDecodeEntities(ctxt, ent->content,
4017 XML_SUBSTITUTE_REF,
4018 0, 0, 0);
4019 --ctxt->depth;
4020 if (rep != NULL) {
4021 current = rep;
4022 while (*current != 0) { /* non input consuming */
4023 if ((*current == 0xD) || (*current == 0xA) ||
4024 (*current == 0x9)) {
4025 buf[len++] = 0x20;
4026 current++;
4027 } else
4028 buf[len++] = *current++;
4029 if (len + 10 > buf_size) {
4030 growBuffer(buf, 10);
4031 }
4032 }
4033 xmlFree(rep);
4034 rep = NULL;
4035 }
4036 } else {
4037 if (len + 10 > buf_size) {
4038 growBuffer(buf, 10);
4039 }
4040 if (ent->content != NULL)
4041 buf[len++] = ent->content[0];
4042 }
4043 } else if (ent != NULL) {
4044 int i = xmlStrlen(ent->name);
4045 const xmlChar *cur = ent->name;
4046
4047 /*
4048 * This may look absurd but is needed to detect
4049 * entities problems
4050 */
4051 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052 (ent->content != NULL) && (ent->checked == 0)) {
4053 unsigned long oldnbent = ctxt->nbentities, diff;
4054
4055 ++ctxt->depth;
4056 rep = xmlStringDecodeEntities(ctxt, ent->content,
4057 XML_SUBSTITUTE_REF, 0, 0, 0);
4058 --ctxt->depth;
4059
4060 diff = ctxt->nbentities - oldnbent + 1;
4061 if (diff > INT_MAX / 2)
4062 diff = INT_MAX / 2;
4063 ent->checked = diff * 2;
4064 if (rep != NULL) {
4065 if (xmlStrchr(rep, '<'))
4066 ent->checked |= 1;
4067 xmlFree(rep);
4068 rep = NULL;
4069 } else {
4070 ent->content[0] = 0;
4071 }
4072 }
4073
4074 /*
4075 * Just output the reference
4076 */
4077 buf[len++] = '&';
4078 while (len + i + 10 > buf_size) {
4079 growBuffer(buf, i + 10);
4080 }
4081 for (;i > 0;i--)
4082 buf[len++] = *cur++;
4083 buf[len++] = ';';
4084 }
4085 }
4086 } else {
4087 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088 if ((len != 0) || (!normalize)) {
4089 if ((!normalize) || (!in_space)) {
4090 COPY_BUF(l,buf,len,0x20);
4091 while (len + 10 > buf_size) {
4092 growBuffer(buf, 10);
4093 }
4094 }
4095 in_space = 1;
4096 }
4097 } else {
4098 in_space = 0;
4099 COPY_BUF(l,buf,len,c);
4100 if (len + 10 > buf_size) {
4101 growBuffer(buf, 10);
4102 }
4103 }
4104 NEXTL(l);
4105 }
4106 GROW;
4107 c = CUR_CHAR(l);
4108 if (len > maxLength) {
4109 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110 "AttValue length too long\n");
4111 goto mem_error;
4112 }
4113 }
4114 if (ctxt->instate == XML_PARSER_EOF)
4115 goto error;
4116
4117 if ((in_space) && (normalize)) {
4118 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4119 }
4120 buf[len] = 0;
4121 if (RAW == '<') {
4122 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4123 } else if (RAW != limit) {
4124 if ((c != 0) && (!IS_CHAR(c))) {
4125 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126 "invalid character in attribute value\n");
4127 } else {
4128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129 "AttValue: ' expected\n");
4130 }
4131 } else
4132 NEXT;
4133
4134 if (attlen != NULL) *attlen = (int) len;
4135 return(buf);
4136
4137 mem_error:
4138 xmlErrMemory(ctxt, NULL);
4139 error:
4140 if (buf != NULL)
4141 xmlFree(buf);
4142 if (rep != NULL)
4143 xmlFree(rep);
4144 return(NULL);
4145 }
4146
4147 /**
4148 * xmlParseAttValue:
4149 * @ctxt: an XML parser context
4150 *
4151 * parse a value for an attribute
4152 * Note: the parser won't do substitution of entities here, this
4153 * will be handled later in xmlStringGetNodeList
4154 *
4155 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4156 * "'" ([^<&'] | Reference)* "'"
4157 *
4158 * 3.3.3 Attribute-Value Normalization:
4159 * Before the value of an attribute is passed to the application or
4160 * checked for validity, the XML processor must normalize it as follows:
4161 * - a character reference is processed by appending the referenced
4162 * character to the attribute value
4163 * - an entity reference is processed by recursively processing the
4164 * replacement text of the entity
4165 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4166 * appending #x20 to the normalized value, except that only a single
4167 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4168 * parsed entity or the literal entity value of an internal parsed entity
4169 * - other characters are processed by appending them to the normalized value
4170 * If the declared value is not CDATA, then the XML processor must further
4171 * process the normalized attribute value by discarding any leading and
4172 * trailing space (#x20) characters, and by replacing sequences of space
4173 * (#x20) characters by a single space (#x20) character.
4174 * All attributes for which no declaration has been read should be treated
4175 * by a non-validating parser as if declared CDATA.
4176 *
4177 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4178 */
4179
4180
4181 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4182 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4183 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4185 }
4186
4187 /**
4188 * xmlParseSystemLiteral:
4189 * @ctxt: an XML parser context
4190 *
4191 * parse an XML Literal
4192 *
4193 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4194 *
4195 * Returns the SystemLiteral parsed or NULL
4196 */
4197
4198 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4199 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4200 xmlChar *buf = NULL;
4201 int len = 0;
4202 int size = XML_PARSER_BUFFER_SIZE;
4203 int cur, l;
4204 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4205 XML_MAX_TEXT_LENGTH :
4206 XML_MAX_NAME_LENGTH;
4207 xmlChar stop;
4208 int state = ctxt->instate;
4209 int count = 0;
4210
4211 SHRINK;
4212 if (RAW == '"') {
4213 NEXT;
4214 stop = '"';
4215 } else if (RAW == '\'') {
4216 NEXT;
4217 stop = '\'';
4218 } else {
4219 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4220 return(NULL);
4221 }
4222
4223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224 if (buf == NULL) {
4225 xmlErrMemory(ctxt, NULL);
4226 return(NULL);
4227 }
4228 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4229 cur = CUR_CHAR(l);
4230 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231 if (len + 5 >= size) {
4232 xmlChar *tmp;
4233
4234 size *= 2;
4235 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236 if (tmp == NULL) {
4237 xmlFree(buf);
4238 xmlErrMemory(ctxt, NULL);
4239 ctxt->instate = (xmlParserInputState) state;
4240 return(NULL);
4241 }
4242 buf = tmp;
4243 }
4244 count++;
4245 if (count > 50) {
4246 SHRINK;
4247 GROW;
4248 count = 0;
4249 if (ctxt->instate == XML_PARSER_EOF) {
4250 xmlFree(buf);
4251 return(NULL);
4252 }
4253 }
4254 COPY_BUF(l,buf,len,cur);
4255 NEXTL(l);
4256 cur = CUR_CHAR(l);
4257 if (cur == 0) {
4258 GROW;
4259 SHRINK;
4260 cur = CUR_CHAR(l);
4261 }
4262 if (len > maxLength) {
4263 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264 xmlFree(buf);
4265 ctxt->instate = (xmlParserInputState) state;
4266 return(NULL);
4267 }
4268 }
4269 buf[len] = 0;
4270 ctxt->instate = (xmlParserInputState) state;
4271 if (!IS_CHAR(cur)) {
4272 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4273 } else {
4274 NEXT;
4275 }
4276 return(buf);
4277 }
4278
4279 /**
4280 * xmlParsePubidLiteral:
4281 * @ctxt: an XML parser context
4282 *
4283 * parse an XML public literal
4284 *
4285 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4286 *
4287 * Returns the PubidLiteral parsed or NULL.
4288 */
4289
4290 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4291 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4292 xmlChar *buf = NULL;
4293 int len = 0;
4294 int size = XML_PARSER_BUFFER_SIZE;
4295 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4296 XML_MAX_TEXT_LENGTH :
4297 XML_MAX_NAME_LENGTH;
4298 xmlChar cur;
4299 xmlChar stop;
4300 int count = 0;
4301 xmlParserInputState oldstate = ctxt->instate;
4302
4303 SHRINK;
4304 if (RAW == '"') {
4305 NEXT;
4306 stop = '"';
4307 } else if (RAW == '\'') {
4308 NEXT;
4309 stop = '\'';
4310 } else {
4311 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4312 return(NULL);
4313 }
4314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315 if (buf == NULL) {
4316 xmlErrMemory(ctxt, NULL);
4317 return(NULL);
4318 }
4319 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4320 cur = CUR;
4321 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322 if (len + 1 >= size) {
4323 xmlChar *tmp;
4324
4325 size *= 2;
4326 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327 if (tmp == NULL) {
4328 xmlErrMemory(ctxt, NULL);
4329 xmlFree(buf);
4330 return(NULL);
4331 }
4332 buf = tmp;
4333 }
4334 buf[len++] = cur;
4335 count++;
4336 if (count > 50) {
4337 SHRINK;
4338 GROW;
4339 count = 0;
4340 if (ctxt->instate == XML_PARSER_EOF) {
4341 xmlFree(buf);
4342 return(NULL);
4343 }
4344 }
4345 NEXT;
4346 cur = CUR;
4347 if (cur == 0) {
4348 GROW;
4349 SHRINK;
4350 cur = CUR;
4351 }
4352 if (len > maxLength) {
4353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354 xmlFree(buf);
4355 return(NULL);
4356 }
4357 }
4358 buf[len] = 0;
4359 if (cur != stop) {
4360 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4361 } else {
4362 NEXT;
4363 }
4364 ctxt->instate = oldstate;
4365 return(buf);
4366 }
4367
4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4369
4370 /*
4371 * used for the test in the inner loop of the char data testing
4372 */
4373 static const unsigned char test_char_data[256] = {
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4406 };
4407
4408 /**
4409 * xmlParseCharData:
4410 * @ctxt: an XML parser context
4411 * @cdata: int indicating whether we are within a CDATA section
4412 *
4413 * parse a CharData section.
4414 * if we are within a CDATA section ']]>' marks an end of section.
4415 *
4416 * The right angle bracket (>) may be represented using the string ">",
4417 * and must, for compatibility, be escaped using ">" or a character
4418 * reference when it appears in the string "]]>" in content, when that
4419 * string is not marking the end of a CDATA section.
4420 *
4421 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4422 */
4423
4424 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4425 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4426 const xmlChar *in;
4427 int nbchar = 0;
4428 int line = ctxt->input->line;
4429 int col = ctxt->input->col;
4430 int ccol;
4431
4432 SHRINK;
4433 GROW;
4434 /*
4435 * Accelerated common case where input don't need to be
4436 * modified before passing it to the handler.
4437 */
4438 if (!cdata) {
4439 in = ctxt->input->cur;
4440 do {
4441 get_more_space:
4442 while (*in == 0x20) { in++; ctxt->input->col++; }
4443 if (*in == 0xA) {
4444 do {
4445 ctxt->input->line++; ctxt->input->col = 1;
4446 in++;
4447 } while (*in == 0xA);
4448 goto get_more_space;
4449 }
4450 if (*in == '<') {
4451 nbchar = in - ctxt->input->cur;
4452 if (nbchar > 0) {
4453 const xmlChar *tmp = ctxt->input->cur;
4454 ctxt->input->cur = in;
4455
4456 if ((ctxt->sax != NULL) &&
4457 (ctxt->sax->ignorableWhitespace !=
4458 ctxt->sax->characters)) {
4459 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460 if (ctxt->sax->ignorableWhitespace != NULL)
4461 ctxt->sax->ignorableWhitespace(ctxt->userData,
4462 tmp, nbchar);
4463 } else {
4464 if (ctxt->sax->characters != NULL)
4465 ctxt->sax->characters(ctxt->userData,
4466 tmp, nbchar);
4467 if (*ctxt->space == -1)
4468 *ctxt->space = -2;
4469 }
4470 } else if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->characters != NULL)) {
4472 ctxt->sax->characters(ctxt->userData,
4473 tmp, nbchar);
4474 }
4475 }
4476 return;
4477 }
4478
4479 get_more:
4480 ccol = ctxt->input->col;
4481 while (test_char_data[*in]) {
4482 in++;
4483 ccol++;
4484 }
4485 ctxt->input->col = ccol;
4486 if (*in == 0xA) {
4487 do {
4488 ctxt->input->line++; ctxt->input->col = 1;
4489 in++;
4490 } while (*in == 0xA);
4491 goto get_more;
4492 }
4493 if (*in == ']') {
4494 if ((in[1] == ']') && (in[2] == '>')) {
4495 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4496 ctxt->input->cur = in + 1;
4497 return;
4498 }
4499 in++;
4500 ctxt->input->col++;
4501 goto get_more;
4502 }
4503 nbchar = in - ctxt->input->cur;
4504 if (nbchar > 0) {
4505 if ((ctxt->sax != NULL) &&
4506 (ctxt->sax->ignorableWhitespace !=
4507 ctxt->sax->characters) &&
4508 (IS_BLANK_CH(*ctxt->input->cur))) {
4509 const xmlChar *tmp = ctxt->input->cur;
4510 ctxt->input->cur = in;
4511
4512 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513 if (ctxt->sax->ignorableWhitespace != NULL)
4514 ctxt->sax->ignorableWhitespace(ctxt->userData,
4515 tmp, nbchar);
4516 } else {
4517 if (ctxt->sax->characters != NULL)
4518 ctxt->sax->characters(ctxt->userData,
4519 tmp, nbchar);
4520 if (*ctxt->space == -1)
4521 *ctxt->space = -2;
4522 }
4523 line = ctxt->input->line;
4524 col = ctxt->input->col;
4525 } else if (ctxt->sax != NULL) {
4526 if (ctxt->sax->characters != NULL)
4527 ctxt->sax->characters(ctxt->userData,
4528 ctxt->input->cur, nbchar);
4529 line = ctxt->input->line;
4530 col = ctxt->input->col;
4531 }
4532 /* something really bad happened in the SAX callback */
4533 if (ctxt->instate != XML_PARSER_CONTENT)
4534 return;
4535 }
4536 ctxt->input->cur = in;
4537 if (*in == 0xD) {
4538 in++;
4539 if (*in == 0xA) {
4540 ctxt->input->cur = in;
4541 in++;
4542 ctxt->input->line++; ctxt->input->col = 1;
4543 continue; /* while */
4544 }
4545 in--;
4546 }
4547 if (*in == '<') {
4548 return;
4549 }
4550 if (*in == '&') {
4551 return;
4552 }
4553 SHRINK;
4554 GROW;
4555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
4557 in = ctxt->input->cur;
4558 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559 nbchar = 0;
4560 }
4561 ctxt->input->line = line;
4562 ctxt->input->col = col;
4563 xmlParseCharDataComplex(ctxt, cdata);
4564 }
4565
4566 /**
4567 * xmlParseCharDataComplex:
4568 * @ctxt: an XML parser context
4569 * @cdata: int indicating whether we are within a CDATA section
4570 *
4571 * parse a CharData section.this is the fallback function
4572 * of xmlParseCharData() when the parsing requires handling
4573 * of non-ASCII characters.
4574 */
4575 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4576 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4577 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4578 int nbchar = 0;
4579 int cur, l;
4580 int count = 0;
4581
4582 SHRINK;
4583 GROW;
4584 cur = CUR_CHAR(l);
4585 while ((cur != '<') && /* checked */
4586 (cur != '&') &&
4587 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588 if ((cur == ']') && (NXT(1) == ']') &&
4589 (NXT(2) == '>')) {
4590 if (cdata) break;
4591 else {
4592 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4593 }
4594 }
4595 COPY_BUF(l,buf,nbchar,cur);
4596 /* move current position before possible calling of ctxt->sax->characters */
4597 NEXTL(l);
4598 cur = CUR_CHAR(l);
4599 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600 buf[nbchar] = 0;
4601
4602 /*
4603 * OK the segment is to be consumed as chars.
4604 */
4605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606 if (areBlanks(ctxt, buf, nbchar, 0)) {
4607 if (ctxt->sax->ignorableWhitespace != NULL)
4608 ctxt->sax->ignorableWhitespace(ctxt->userData,
4609 buf, nbchar);
4610 } else {
4611 if (ctxt->sax->characters != NULL)
4612 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613 if ((ctxt->sax->characters !=
4614 ctxt->sax->ignorableWhitespace) &&
4615 (*ctxt->space == -1))
4616 *ctxt->space = -2;
4617 }
4618 }
4619 nbchar = 0;
4620 /* something really bad happened in the SAX callback */
4621 if (ctxt->instate != XML_PARSER_CONTENT)
4622 return;
4623 }
4624 count++;
4625 if (count > 50) {
4626 SHRINK;
4627 GROW;
4628 count = 0;
4629 if (ctxt->instate == XML_PARSER_EOF)
4630 return;
4631 }
4632 }
4633 if (nbchar != 0) {
4634 buf[nbchar] = 0;
4635 /*
4636 * OK the segment is to be consumed as chars.
4637 */
4638 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639 if (areBlanks(ctxt, buf, nbchar, 0)) {
4640 if (ctxt->sax->ignorableWhitespace != NULL)
4641 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642 } else {
4643 if (ctxt->sax->characters != NULL)
4644 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646 (*ctxt->space == -1))
4647 *ctxt->space = -2;
4648 }
4649 }
4650 }
4651 if ((cur != 0) && (!IS_CHAR(cur))) {
4652 /* Generate the error and skip the offending character */
4653 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654 "PCDATA invalid Char value %d\n",
4655 cur);
4656 NEXTL(l);
4657 }
4658 }
4659
4660 /**
4661 * xmlParseExternalID:
4662 * @ctxt: an XML parser context
4663 * @publicID: a xmlChar** receiving PubidLiteral
4664 * @strict: indicate whether we should restrict parsing to only
4665 * production [75], see NOTE below
4666 *
4667 * Parse an External ID or a Public ID
4668 *
4669 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4670 * 'PUBLIC' S PubidLiteral S SystemLiteral
4671 *
4672 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4673 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4674 *
4675 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4676 *
4677 * Returns the function returns SystemLiteral and in the second
4678 * case publicID receives PubidLiteral, is strict is off
4679 * it is possible to return NULL and have publicID set.
4680 */
4681
4682 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4683 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4684 xmlChar *URI = NULL;
4685
4686 SHRINK;
4687
4688 *publicID = NULL;
4689 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690 SKIP(6);
4691 if (SKIP_BLANKS == 0) {
4692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693 "Space required after 'SYSTEM'\n");
4694 }
4695 URI = xmlParseSystemLiteral(ctxt);
4696 if (URI == NULL) {
4697 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4698 }
4699 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700 SKIP(6);
4701 if (SKIP_BLANKS == 0) {
4702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'PUBLIC'\n");
4704 }
4705 *publicID = xmlParsePubidLiteral(ctxt);
4706 if (*publicID == NULL) {
4707 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4708 }
4709 if (strict) {
4710 /*
4711 * We don't handle [83] so "S SystemLiteral" is required.
4712 */
4713 if (SKIP_BLANKS == 0) {
4714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4715 "Space required after the Public Identifier\n");
4716 }
4717 } else {
4718 /*
4719 * We handle [83] so we return immediately, if
4720 * "S SystemLiteral" is not detected. We skip blanks if no
4721 * system literal was found, but this is harmless since we must
4722 * be at the end of a NotationDecl.
4723 */
4724 if (SKIP_BLANKS == 0) return(NULL);
4725 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4726 }
4727 URI = xmlParseSystemLiteral(ctxt);
4728 if (URI == NULL) {
4729 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4730 }
4731 }
4732 return(URI);
4733 }
4734
4735 /**
4736 * xmlParseCommentComplex:
4737 * @ctxt: an XML parser context
4738 * @buf: the already parsed part of the buffer
4739 * @len: number of bytes in the buffer
4740 * @size: allocated size of the buffer
4741 *
4742 * Skip an XML (SGML) comment <!-- .... -->
4743 * The spec says that "For compatibility, the string "--" (double-hyphen)
4744 * must not occur within comments. "
4745 * This is the slow routine in case the accelerator for ascii didn't work
4746 *
4747 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4748 */
4749 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4750 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4751 size_t len, size_t size) {
4752 int q, ql;
4753 int r, rl;
4754 int cur, l;
4755 size_t count = 0;
4756 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4757 XML_MAX_HUGE_LENGTH :
4758 XML_MAX_TEXT_LENGTH;
4759 int inputid;
4760
4761 inputid = ctxt->input->id;
4762
4763 if (buf == NULL) {
4764 len = 0;
4765 size = XML_PARSER_BUFFER_SIZE;
4766 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4767 if (buf == NULL) {
4768 xmlErrMemory(ctxt, NULL);
4769 return;
4770 }
4771 }
4772 GROW; /* Assure there's enough input data */
4773 q = CUR_CHAR(ql);
4774 if (q == 0)
4775 goto not_terminated;
4776 if (!IS_CHAR(q)) {
4777 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4778 "xmlParseComment: invalid xmlChar value %d\n",
4779 q);
4780 xmlFree (buf);
4781 return;
4782 }
4783 NEXTL(ql);
4784 r = CUR_CHAR(rl);
4785 if (r == 0)
4786 goto not_terminated;
4787 if (!IS_CHAR(r)) {
4788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789 "xmlParseComment: invalid xmlChar value %d\n",
4790 q);
4791 xmlFree (buf);
4792 return;
4793 }
4794 NEXTL(rl);
4795 cur = CUR_CHAR(l);
4796 if (cur == 0)
4797 goto not_terminated;
4798 while (IS_CHAR(cur) && /* checked */
4799 ((cur != '>') ||
4800 (r != '-') || (q != '-'))) {
4801 if ((r == '-') && (q == '-')) {
4802 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4803 }
4804 if (len + 5 >= size) {
4805 xmlChar *new_buf;
4806 size_t new_size;
4807
4808 new_size = size * 2;
4809 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4810 if (new_buf == NULL) {
4811 xmlFree (buf);
4812 xmlErrMemory(ctxt, NULL);
4813 return;
4814 }
4815 buf = new_buf;
4816 size = new_size;
4817 }
4818 COPY_BUF(ql,buf,len,q);
4819 q = r;
4820 ql = rl;
4821 r = cur;
4822 rl = l;
4823
4824 count++;
4825 if (count > 50) {
4826 SHRINK;
4827 GROW;
4828 count = 0;
4829 if (ctxt->instate == XML_PARSER_EOF) {
4830 xmlFree(buf);
4831 return;
4832 }
4833 }
4834 NEXTL(l);
4835 cur = CUR_CHAR(l);
4836 if (cur == 0) {
4837 SHRINK;
4838 GROW;
4839 cur = CUR_CHAR(l);
4840 }
4841
4842 if (len > maxLength) {
4843 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4844 "Comment too big found", NULL);
4845 xmlFree (buf);
4846 return;
4847 }
4848 }
4849 buf[len] = 0;
4850 if (cur == 0) {
4851 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4852 "Comment not terminated \n<!--%.50s\n", buf);
4853 } else if (!IS_CHAR(cur)) {
4854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4855 "xmlParseComment: invalid xmlChar value %d\n",
4856 cur);
4857 } else {
4858 if (inputid != ctxt->input->id) {
4859 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4860 "Comment doesn't start and stop in the same"
4861 " entity\n");
4862 }
4863 NEXT;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4865 (!ctxt->disableSAX))
4866 ctxt->sax->comment(ctxt->userData, buf);
4867 }
4868 xmlFree(buf);
4869 return;
4870 not_terminated:
4871 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872 "Comment not terminated\n", NULL);
4873 xmlFree(buf);
4874 return;
4875 }
4876
4877 /**
4878 * xmlParseComment:
4879 * @ctxt: an XML parser context
4880 *
4881 * Skip an XML (SGML) comment <!-- .... -->
4882 * The spec says that "For compatibility, the string "--" (double-hyphen)
4883 * must not occur within comments. "
4884 *
4885 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4886 */
4887 void
xmlParseComment(xmlParserCtxtPtr ctxt)4888 xmlParseComment(xmlParserCtxtPtr ctxt) {
4889 xmlChar *buf = NULL;
4890 size_t size = XML_PARSER_BUFFER_SIZE;
4891 size_t len = 0;
4892 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4893 XML_MAX_HUGE_LENGTH :
4894 XML_MAX_TEXT_LENGTH;
4895 xmlParserInputState state;
4896 const xmlChar *in;
4897 size_t nbchar = 0;
4898 int ccol;
4899 int inputid;
4900
4901 /*
4902 * Check that there is a comment right here.
4903 */
4904 if ((RAW != '<') || (NXT(1) != '!') ||
4905 (NXT(2) != '-') || (NXT(3) != '-')) return;
4906 state = ctxt->instate;
4907 ctxt->instate = XML_PARSER_COMMENT;
4908 inputid = ctxt->input->id;
4909 SKIP(4);
4910 SHRINK;
4911 GROW;
4912
4913 /*
4914 * Accelerated common case where input don't need to be
4915 * modified before passing it to the handler.
4916 */
4917 in = ctxt->input->cur;
4918 do {
4919 if (*in == 0xA) {
4920 do {
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 in++;
4923 } while (*in == 0xA);
4924 }
4925 get_more:
4926 ccol = ctxt->input->col;
4927 while (((*in > '-') && (*in <= 0x7F)) ||
4928 ((*in >= 0x20) && (*in < '-')) ||
4929 (*in == 0x09)) {
4930 in++;
4931 ccol++;
4932 }
4933 ctxt->input->col = ccol;
4934 if (*in == 0xA) {
4935 do {
4936 ctxt->input->line++; ctxt->input->col = 1;
4937 in++;
4938 } while (*in == 0xA);
4939 goto get_more;
4940 }
4941 nbchar = in - ctxt->input->cur;
4942 /*
4943 * save current set of data
4944 */
4945 if (nbchar > 0) {
4946 if ((ctxt->sax != NULL) &&
4947 (ctxt->sax->comment != NULL)) {
4948 if (buf == NULL) {
4949 if ((*in == '-') && (in[1] == '-'))
4950 size = nbchar + 1;
4951 else
4952 size = XML_PARSER_BUFFER_SIZE + nbchar;
4953 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4954 if (buf == NULL) {
4955 xmlErrMemory(ctxt, NULL);
4956 ctxt->instate = state;
4957 return;
4958 }
4959 len = 0;
4960 } else if (len + nbchar + 1 >= size) {
4961 xmlChar *new_buf;
4962 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4963 new_buf = (xmlChar *) xmlRealloc(buf,
4964 size * sizeof(xmlChar));
4965 if (new_buf == NULL) {
4966 xmlFree (buf);
4967 xmlErrMemory(ctxt, NULL);
4968 ctxt->instate = state;
4969 return;
4970 }
4971 buf = new_buf;
4972 }
4973 memcpy(&buf[len], ctxt->input->cur, nbchar);
4974 len += nbchar;
4975 buf[len] = 0;
4976 }
4977 }
4978 if (len > maxLength) {
4979 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4980 "Comment too big found", NULL);
4981 xmlFree (buf);
4982 return;
4983 }
4984 ctxt->input->cur = in;
4985 if (*in == 0xA) {
4986 in++;
4987 ctxt->input->line++; ctxt->input->col = 1;
4988 }
4989 if (*in == 0xD) {
4990 in++;
4991 if (*in == 0xA) {
4992 ctxt->input->cur = in;
4993 in++;
4994 ctxt->input->line++; ctxt->input->col = 1;
4995 goto get_more;
4996 }
4997 in--;
4998 }
4999 SHRINK;
5000 GROW;
5001 if (ctxt->instate == XML_PARSER_EOF) {
5002 xmlFree(buf);
5003 return;
5004 }
5005 in = ctxt->input->cur;
5006 if (*in == '-') {
5007 if (in[1] == '-') {
5008 if (in[2] == '>') {
5009 if (ctxt->input->id != inputid) {
5010 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5011 "comment doesn't start and stop in the"
5012 " same entity\n");
5013 }
5014 SKIP(3);
5015 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5016 (!ctxt->disableSAX)) {
5017 if (buf != NULL)
5018 ctxt->sax->comment(ctxt->userData, buf);
5019 else
5020 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5021 }
5022 if (buf != NULL)
5023 xmlFree(buf);
5024 if (ctxt->instate != XML_PARSER_EOF)
5025 ctxt->instate = state;
5026 return;
5027 }
5028 if (buf != NULL) {
5029 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5030 "Double hyphen within comment: "
5031 "<!--%.50s\n",
5032 buf);
5033 } else
5034 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5035 "Double hyphen within comment\n", NULL);
5036 if (ctxt->instate == XML_PARSER_EOF) {
5037 xmlFree(buf);
5038 return;
5039 }
5040 in++;
5041 ctxt->input->col++;
5042 }
5043 in++;
5044 ctxt->input->col++;
5045 goto get_more;
5046 }
5047 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5048 xmlParseCommentComplex(ctxt, buf, len, size);
5049 ctxt->instate = state;
5050 return;
5051 }
5052
5053
5054 /**
5055 * xmlParsePITarget:
5056 * @ctxt: an XML parser context
5057 *
5058 * parse the name of a PI
5059 *
5060 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5061 *
5062 * Returns the PITarget name or NULL
5063 */
5064
5065 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5066 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5067 const xmlChar *name;
5068
5069 name = xmlParseName(ctxt);
5070 if ((name != NULL) &&
5071 ((name[0] == 'x') || (name[0] == 'X')) &&
5072 ((name[1] == 'm') || (name[1] == 'M')) &&
5073 ((name[2] == 'l') || (name[2] == 'L'))) {
5074 int i;
5075 if ((name[0] == 'x') && (name[1] == 'm') &&
5076 (name[2] == 'l') && (name[3] == 0)) {
5077 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 "XML declaration allowed only at the start of the document\n");
5079 return(name);
5080 } else if (name[3] == 0) {
5081 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5082 return(name);
5083 }
5084 for (i = 0;;i++) {
5085 if (xmlW3CPIs[i] == NULL) break;
5086 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5087 return(name);
5088 }
5089 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5090 "xmlParsePITarget: invalid name prefix 'xml'\n",
5091 NULL, NULL);
5092 }
5093 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5094 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5095 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5096 }
5097 return(name);
5098 }
5099
5100 #ifdef LIBXML_CATALOG_ENABLED
5101 /**
5102 * xmlParseCatalogPI:
5103 * @ctxt: an XML parser context
5104 * @catalog: the PI value string
5105 *
5106 * parse an XML Catalog Processing Instruction.
5107 *
5108 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5109 *
5110 * Occurs only if allowed by the user and if happening in the Misc
5111 * part of the document before any doctype information
5112 * This will add the given catalog to the parsing context in order
5113 * to be used if there is a resolution need further down in the document
5114 */
5115
5116 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5117 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5118 xmlChar *URL = NULL;
5119 const xmlChar *tmp, *base;
5120 xmlChar marker;
5121
5122 tmp = catalog;
5123 while (IS_BLANK_CH(*tmp)) tmp++;
5124 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5125 goto error;
5126 tmp += 7;
5127 while (IS_BLANK_CH(*tmp)) tmp++;
5128 if (*tmp != '=') {
5129 return;
5130 }
5131 tmp++;
5132 while (IS_BLANK_CH(*tmp)) tmp++;
5133 marker = *tmp;
5134 if ((marker != '\'') && (marker != '"'))
5135 goto error;
5136 tmp++;
5137 base = tmp;
5138 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5139 if (*tmp == 0)
5140 goto error;
5141 URL = xmlStrndup(base, tmp - base);
5142 tmp++;
5143 while (IS_BLANK_CH(*tmp)) tmp++;
5144 if (*tmp != 0)
5145 goto error;
5146
5147 if (URL != NULL) {
5148 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5149 xmlFree(URL);
5150 }
5151 return;
5152
5153 error:
5154 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5155 "Catalog PI syntax error: %s\n",
5156 catalog, NULL);
5157 if (URL != NULL)
5158 xmlFree(URL);
5159 }
5160 #endif
5161
5162 /**
5163 * xmlParsePI:
5164 * @ctxt: an XML parser context
5165 *
5166 * parse an XML Processing Instruction.
5167 *
5168 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5169 *
5170 * The processing is transferred to SAX once parsed.
5171 */
5172
5173 void
xmlParsePI(xmlParserCtxtPtr ctxt)5174 xmlParsePI(xmlParserCtxtPtr ctxt) {
5175 xmlChar *buf = NULL;
5176 size_t len = 0;
5177 size_t size = XML_PARSER_BUFFER_SIZE;
5178 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5179 XML_MAX_HUGE_LENGTH :
5180 XML_MAX_TEXT_LENGTH;
5181 int cur, l;
5182 const xmlChar *target;
5183 xmlParserInputState state;
5184 int count = 0;
5185
5186 if ((RAW == '<') && (NXT(1) == '?')) {
5187 int inputid = ctxt->input->id;
5188 state = ctxt->instate;
5189 ctxt->instate = XML_PARSER_PI;
5190 /*
5191 * this is a Processing Instruction.
5192 */
5193 SKIP(2);
5194 SHRINK;
5195
5196 /*
5197 * Parse the target name and check for special support like
5198 * namespace.
5199 */
5200 target = xmlParsePITarget(ctxt);
5201 if (target != NULL) {
5202 if ((RAW == '?') && (NXT(1) == '>')) {
5203 if (inputid != ctxt->input->id) {
5204 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5205 "PI declaration doesn't start and stop in"
5206 " the same entity\n");
5207 }
5208 SKIP(2);
5209
5210 /*
5211 * SAX: PI detected.
5212 */
5213 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5214 (ctxt->sax->processingInstruction != NULL))
5215 ctxt->sax->processingInstruction(ctxt->userData,
5216 target, NULL);
5217 if (ctxt->instate != XML_PARSER_EOF)
5218 ctxt->instate = state;
5219 return;
5220 }
5221 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5222 if (buf == NULL) {
5223 xmlErrMemory(ctxt, NULL);
5224 ctxt->instate = state;
5225 return;
5226 }
5227 if (SKIP_BLANKS == 0) {
5228 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5229 "ParsePI: PI %s space expected\n", target);
5230 }
5231 cur = CUR_CHAR(l);
5232 while (IS_CHAR(cur) && /* checked */
5233 ((cur != '?') || (NXT(1) != '>'))) {
5234 if (len + 5 >= size) {
5235 xmlChar *tmp;
5236 size_t new_size = size * 2;
5237 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5238 if (tmp == NULL) {
5239 xmlErrMemory(ctxt, NULL);
5240 xmlFree(buf);
5241 ctxt->instate = state;
5242 return;
5243 }
5244 buf = tmp;
5245 size = new_size;
5246 }
5247 count++;
5248 if (count > 50) {
5249 SHRINK;
5250 GROW;
5251 if (ctxt->instate == XML_PARSER_EOF) {
5252 xmlFree(buf);
5253 return;
5254 }
5255 count = 0;
5256 }
5257 COPY_BUF(l,buf,len,cur);
5258 NEXTL(l);
5259 cur = CUR_CHAR(l);
5260 if (cur == 0) {
5261 SHRINK;
5262 GROW;
5263 cur = CUR_CHAR(l);
5264 }
5265 if (len > maxLength) {
5266 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5267 "PI %s too big found", target);
5268 xmlFree(buf);
5269 ctxt->instate = state;
5270 return;
5271 }
5272 }
5273 buf[len] = 0;
5274 if (cur != '?') {
5275 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5276 "ParsePI: PI %s never end ...\n", target);
5277 } else {
5278 if (inputid != ctxt->input->id) {
5279 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5280 "PI declaration doesn't start and stop in"
5281 " the same entity\n");
5282 }
5283 SKIP(2);
5284
5285 #ifdef LIBXML_CATALOG_ENABLED
5286 if (((state == XML_PARSER_MISC) ||
5287 (state == XML_PARSER_START)) &&
5288 (xmlStrEqual(target, XML_CATALOG_PI))) {
5289 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5290 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5291 (allow == XML_CATA_ALLOW_ALL))
5292 xmlParseCatalogPI(ctxt, buf);
5293 }
5294 #endif
5295
5296
5297 /*
5298 * SAX: PI detected.
5299 */
5300 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5301 (ctxt->sax->processingInstruction != NULL))
5302 ctxt->sax->processingInstruction(ctxt->userData,
5303 target, buf);
5304 }
5305 xmlFree(buf);
5306 } else {
5307 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5308 }
5309 if (ctxt->instate != XML_PARSER_EOF)
5310 ctxt->instate = state;
5311 }
5312 }
5313
5314 /**
5315 * xmlParseNotationDecl:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse a notation declaration
5319 *
5320 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5321 *
5322 * Hence there is actually 3 choices:
5323 * 'PUBLIC' S PubidLiteral
5324 * 'PUBLIC' S PubidLiteral S SystemLiteral
5325 * and 'SYSTEM' S SystemLiteral
5326 *
5327 * See the NOTE on xmlParseExternalID().
5328 */
5329
5330 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5331 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5332 const xmlChar *name;
5333 xmlChar *Pubid;
5334 xmlChar *Systemid;
5335
5336 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5337 int inputid = ctxt->input->id;
5338 SHRINK;
5339 SKIP(10);
5340 if (SKIP_BLANKS == 0) {
5341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342 "Space required after '<!NOTATION'\n");
5343 return;
5344 }
5345
5346 name = xmlParseName(ctxt);
5347 if (name == NULL) {
5348 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5349 return;
5350 }
5351 if (xmlStrchr(name, ':') != NULL) {
5352 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5353 "colons are forbidden from notation names '%s'\n",
5354 name, NULL, NULL);
5355 }
5356 if (SKIP_BLANKS == 0) {
5357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358 "Space required after the NOTATION name'\n");
5359 return;
5360 }
5361
5362 /*
5363 * Parse the IDs.
5364 */
5365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5366 SKIP_BLANKS;
5367
5368 if (RAW == '>') {
5369 if (inputid != ctxt->input->id) {
5370 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5371 "Notation declaration doesn't start and stop"
5372 " in the same entity\n");
5373 }
5374 NEXT;
5375 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376 (ctxt->sax->notationDecl != NULL))
5377 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378 } else {
5379 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380 }
5381 if (Systemid != NULL) xmlFree(Systemid);
5382 if (Pubid != NULL) xmlFree(Pubid);
5383 }
5384 }
5385
5386 /**
5387 * xmlParseEntityDecl:
5388 * @ctxt: an XML parser context
5389 *
5390 * parse <!ENTITY declarations
5391 *
5392 * [70] EntityDecl ::= GEDecl | PEDecl
5393 *
5394 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5395 *
5396 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5397 *
5398 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5399 *
5400 * [74] PEDef ::= EntityValue | ExternalID
5401 *
5402 * [76] NDataDecl ::= S 'NDATA' S Name
5403 *
5404 * [ VC: Notation Declared ]
5405 * The Name must match the declared name of a notation.
5406 */
5407
5408 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5409 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5410 const xmlChar *name = NULL;
5411 xmlChar *value = NULL;
5412 xmlChar *URI = NULL, *literal = NULL;
5413 const xmlChar *ndata = NULL;
5414 int isParameter = 0;
5415 xmlChar *orig = NULL;
5416
5417 /* GROW; done in the caller */
5418 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5419 int inputid = ctxt->input->id;
5420 SHRINK;
5421 SKIP(8);
5422 if (SKIP_BLANKS == 0) {
5423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5424 "Space required after '<!ENTITY'\n");
5425 }
5426
5427 if (RAW == '%') {
5428 NEXT;
5429 if (SKIP_BLANKS == 0) {
5430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431 "Space required after '%%'\n");
5432 }
5433 isParameter = 1;
5434 }
5435
5436 name = xmlParseName(ctxt);
5437 if (name == NULL) {
5438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5439 "xmlParseEntityDecl: no name\n");
5440 return;
5441 }
5442 if (xmlStrchr(name, ':') != NULL) {
5443 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5444 "colons are forbidden from entities names '%s'\n",
5445 name, NULL, NULL);
5446 }
5447 if (SKIP_BLANKS == 0) {
5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the entity name\n");
5450 }
5451
5452 ctxt->instate = XML_PARSER_ENTITY_DECL;
5453 /*
5454 * handle the various case of definitions...
5455 */
5456 if (isParameter) {
5457 if ((RAW == '"') || (RAW == '\'')) {
5458 value = xmlParseEntityValue(ctxt, &orig);
5459 if (value) {
5460 if ((ctxt->sax != NULL) &&
5461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462 ctxt->sax->entityDecl(ctxt->userData, name,
5463 XML_INTERNAL_PARAMETER_ENTITY,
5464 NULL, NULL, value);
5465 }
5466 } else {
5467 URI = xmlParseExternalID(ctxt, &literal, 1);
5468 if ((URI == NULL) && (literal == NULL)) {
5469 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5470 }
5471 if (URI) {
5472 xmlURIPtr uri;
5473
5474 uri = xmlParseURI((const char *) URI);
5475 if (uri == NULL) {
5476 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477 "Invalid URI: %s\n", URI);
5478 /*
5479 * This really ought to be a well formedness error
5480 * but the XML Core WG decided otherwise c.f. issue
5481 * E26 of the XML erratas.
5482 */
5483 } else {
5484 if (uri->fragment != NULL) {
5485 /*
5486 * Okay this is foolish to block those but not
5487 * invalid URIs.
5488 */
5489 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5490 } else {
5491 if ((ctxt->sax != NULL) &&
5492 (!ctxt->disableSAX) &&
5493 (ctxt->sax->entityDecl != NULL))
5494 ctxt->sax->entityDecl(ctxt->userData, name,
5495 XML_EXTERNAL_PARAMETER_ENTITY,
5496 literal, URI, NULL);
5497 }
5498 xmlFreeURI(uri);
5499 }
5500 }
5501 }
5502 } else {
5503 if ((RAW == '"') || (RAW == '\'')) {
5504 value = xmlParseEntityValue(ctxt, &orig);
5505 if ((ctxt->sax != NULL) &&
5506 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507 ctxt->sax->entityDecl(ctxt->userData, name,
5508 XML_INTERNAL_GENERAL_ENTITY,
5509 NULL, NULL, value);
5510 /*
5511 * For expat compatibility in SAX mode.
5512 */
5513 if ((ctxt->myDoc == NULL) ||
5514 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515 if (ctxt->myDoc == NULL) {
5516 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5517 if (ctxt->myDoc == NULL) {
5518 xmlErrMemory(ctxt, "New Doc failed");
5519 return;
5520 }
5521 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5522 }
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5526
5527 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
5529 }
5530 } else {
5531 URI = xmlParseExternalID(ctxt, &literal, 1);
5532 if ((URI == NULL) && (literal == NULL)) {
5533 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5534 }
5535 if (URI) {
5536 xmlURIPtr uri;
5537
5538 uri = xmlParseURI((const char *)URI);
5539 if (uri == NULL) {
5540 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541 "Invalid URI: %s\n", URI);
5542 /*
5543 * This really ought to be a well formedness error
5544 * but the XML Core WG decided otherwise c.f. issue
5545 * E26 of the XML erratas.
5546 */
5547 } else {
5548 if (uri->fragment != NULL) {
5549 /*
5550 * Okay this is foolish to block those but not
5551 * invalid URIs.
5552 */
5553 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5554 }
5555 xmlFreeURI(uri);
5556 }
5557 }
5558 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required before 'NDATA'\n");
5561 }
5562 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5563 SKIP(5);
5564 if (SKIP_BLANKS == 0) {
5565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space required after 'NDATA'\n");
5567 }
5568 ndata = xmlParseName(ctxt);
5569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5570 (ctxt->sax->unparsedEntityDecl != NULL))
5571 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5572 literal, URI, ndata);
5573 } else {
5574 if ((ctxt->sax != NULL) &&
5575 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5576 ctxt->sax->entityDecl(ctxt->userData, name,
5577 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5578 literal, URI, NULL);
5579 /*
5580 * For expat compatibility in SAX mode.
5581 * assuming the entity replacement was asked for
5582 */
5583 if ((ctxt->replaceEntities != 0) &&
5584 ((ctxt->myDoc == NULL) ||
5585 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5586 if (ctxt->myDoc == NULL) {
5587 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588 if (ctxt->myDoc == NULL) {
5589 xmlErrMemory(ctxt, "New Doc failed");
5590 return;
5591 }
5592 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593 }
5594
5595 if (ctxt->myDoc->intSubset == NULL)
5596 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597 BAD_CAST "fake", NULL, NULL);
5598 xmlSAX2EntityDecl(ctxt, name,
5599 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5600 literal, URI, NULL);
5601 }
5602 }
5603 }
5604 }
5605 if (ctxt->instate == XML_PARSER_EOF)
5606 goto done;
5607 SKIP_BLANKS;
5608 if (RAW != '>') {
5609 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5610 "xmlParseEntityDecl: entity %s not terminated\n", name);
5611 xmlHaltParser(ctxt);
5612 } else {
5613 if (inputid != ctxt->input->id) {
5614 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5615 "Entity declaration doesn't start and stop in"
5616 " the same entity\n");
5617 }
5618 NEXT;
5619 }
5620 if (orig != NULL) {
5621 /*
5622 * Ugly mechanism to save the raw entity value.
5623 */
5624 xmlEntityPtr cur = NULL;
5625
5626 if (isParameter) {
5627 if ((ctxt->sax != NULL) &&
5628 (ctxt->sax->getParameterEntity != NULL))
5629 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630 } else {
5631 if ((ctxt->sax != NULL) &&
5632 (ctxt->sax->getEntity != NULL))
5633 cur = ctxt->sax->getEntity(ctxt->userData, name);
5634 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5635 cur = xmlSAX2GetEntity(ctxt, name);
5636 }
5637 }
5638 if ((cur != NULL) && (cur->orig == NULL)) {
5639 cur->orig = orig;
5640 orig = NULL;
5641 }
5642 }
5643
5644 done:
5645 if (value != NULL) xmlFree(value);
5646 if (URI != NULL) xmlFree(URI);
5647 if (literal != NULL) xmlFree(literal);
5648 if (orig != NULL) xmlFree(orig);
5649 }
5650 }
5651
5652 /**
5653 * xmlParseDefaultDecl:
5654 * @ctxt: an XML parser context
5655 * @value: Receive a possible fixed default value for the attribute
5656 *
5657 * Parse an attribute default declaration
5658 *
5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5660 *
5661 * [ VC: Required Attribute ]
5662 * if the default declaration is the keyword #REQUIRED, then the
5663 * attribute must be specified for all elements of the type in the
5664 * attribute-list declaration.
5665 *
5666 * [ VC: Attribute Default Legal ]
5667 * The declared default value must meet the lexical constraints of
5668 * the declared attribute type c.f. xmlValidateAttributeDecl()
5669 *
5670 * [ VC: Fixed Attribute Default ]
5671 * if an attribute has a default value declared with the #FIXED
5672 * keyword, instances of that attribute must match the default value.
5673 *
5674 * [ WFC: No < in Attribute Values ]
5675 * handled in xmlParseAttValue()
5676 *
5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5678 * or XML_ATTRIBUTE_FIXED.
5679 */
5680
5681 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5682 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683 int val;
5684 xmlChar *ret;
5685
5686 *value = NULL;
5687 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5688 SKIP(9);
5689 return(XML_ATTRIBUTE_REQUIRED);
5690 }
5691 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5692 SKIP(8);
5693 return(XML_ATTRIBUTE_IMPLIED);
5694 }
5695 val = XML_ATTRIBUTE_NONE;
5696 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5697 SKIP(6);
5698 val = XML_ATTRIBUTE_FIXED;
5699 if (SKIP_BLANKS == 0) {
5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required after '#FIXED'\n");
5702 }
5703 }
5704 ret = xmlParseAttValue(ctxt);
5705 ctxt->instate = XML_PARSER_DTD;
5706 if (ret == NULL) {
5707 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5708 "Attribute default value declaration error\n");
5709 } else
5710 *value = ret;
5711 return(val);
5712 }
5713
5714 /**
5715 * xmlParseNotationType:
5716 * @ctxt: an XML parser context
5717 *
5718 * parse an Notation attribute type.
5719 *
5720 * Note: the leading 'NOTATION' S part has already being parsed...
5721 *
5722 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5723 *
5724 * [ VC: Notation Attributes ]
5725 * Values of this type must match one of the notation names included
5726 * in the declaration; all notation names in the declaration must be declared.
5727 *
5728 * Returns: the notation attribute tree built while parsing
5729 */
5730
5731 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5732 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5733 const xmlChar *name;
5734 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735
5736 if (RAW != '(') {
5737 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738 return(NULL);
5739 }
5740 SHRINK;
5741 do {
5742 NEXT;
5743 SKIP_BLANKS;
5744 name = xmlParseName(ctxt);
5745 if (name == NULL) {
5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "Name expected in NOTATION declaration\n");
5748 xmlFreeEnumeration(ret);
5749 return(NULL);
5750 }
5751 tmp = ret;
5752 while (tmp != NULL) {
5753 if (xmlStrEqual(name, tmp->name)) {
5754 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5755 "standalone: attribute notation value token %s duplicated\n",
5756 name, NULL);
5757 if (!xmlDictOwns(ctxt->dict, name))
5758 xmlFree((xmlChar *) name);
5759 break;
5760 }
5761 tmp = tmp->next;
5762 }
5763 if (tmp == NULL) {
5764 cur = xmlCreateEnumeration(name);
5765 if (cur == NULL) {
5766 xmlFreeEnumeration(ret);
5767 return(NULL);
5768 }
5769 if (last == NULL) ret = last = cur;
5770 else {
5771 last->next = cur;
5772 last = cur;
5773 }
5774 }
5775 SKIP_BLANKS;
5776 } while (RAW == '|');
5777 if (RAW != ')') {
5778 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5779 xmlFreeEnumeration(ret);
5780 return(NULL);
5781 }
5782 NEXT;
5783 return(ret);
5784 }
5785
5786 /**
5787 * xmlParseEnumerationType:
5788 * @ctxt: an XML parser context
5789 *
5790 * parse an Enumeration attribute type.
5791 *
5792 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5793 *
5794 * [ VC: Enumeration ]
5795 * Values of this type must match one of the Nmtoken tokens in
5796 * the declaration
5797 *
5798 * Returns: the enumeration attribute tree built while parsing
5799 */
5800
5801 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5802 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5803 xmlChar *name;
5804 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5805
5806 if (RAW != '(') {
5807 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808 return(NULL);
5809 }
5810 SHRINK;
5811 do {
5812 NEXT;
5813 SKIP_BLANKS;
5814 name = xmlParseNmtoken(ctxt);
5815 if (name == NULL) {
5816 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5817 return(ret);
5818 }
5819 tmp = ret;
5820 while (tmp != NULL) {
5821 if (xmlStrEqual(name, tmp->name)) {
5822 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5823 "standalone: attribute enumeration value token %s duplicated\n",
5824 name, NULL);
5825 if (!xmlDictOwns(ctxt->dict, name))
5826 xmlFree(name);
5827 break;
5828 }
5829 tmp = tmp->next;
5830 }
5831 if (tmp == NULL) {
5832 cur = xmlCreateEnumeration(name);
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree(name);
5835 if (cur == NULL) {
5836 xmlFreeEnumeration(ret);
5837 return(NULL);
5838 }
5839 if (last == NULL) ret = last = cur;
5840 else {
5841 last->next = cur;
5842 last = cur;
5843 }
5844 }
5845 SKIP_BLANKS;
5846 } while (RAW == '|');
5847 if (RAW != ')') {
5848 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5849 return(ret);
5850 }
5851 NEXT;
5852 return(ret);
5853 }
5854
5855 /**
5856 * xmlParseEnumeratedType:
5857 * @ctxt: an XML parser context
5858 * @tree: the enumeration tree built while parsing
5859 *
5860 * parse an Enumerated attribute type.
5861 *
5862 * [57] EnumeratedType ::= NotationType | Enumeration
5863 *
5864 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5865 *
5866 *
5867 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5868 */
5869
5870 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5871 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5872 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5873 SKIP(8);
5874 if (SKIP_BLANKS == 0) {
5875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5876 "Space required after 'NOTATION'\n");
5877 return(0);
5878 }
5879 *tree = xmlParseNotationType(ctxt);
5880 if (*tree == NULL) return(0);
5881 return(XML_ATTRIBUTE_NOTATION);
5882 }
5883 *tree = xmlParseEnumerationType(ctxt);
5884 if (*tree == NULL) return(0);
5885 return(XML_ATTRIBUTE_ENUMERATION);
5886 }
5887
5888 /**
5889 * xmlParseAttributeType:
5890 * @ctxt: an XML parser context
5891 * @tree: the enumeration tree built while parsing
5892 *
5893 * parse the Attribute list def for an element
5894 *
5895 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5896 *
5897 * [55] StringType ::= 'CDATA'
5898 *
5899 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5901 *
5902 * Validity constraints for attribute values syntax are checked in
5903 * xmlValidateAttributeValue()
5904 *
5905 * [ VC: ID ]
5906 * Values of type ID must match the Name production. A name must not
5907 * appear more than once in an XML document as a value of this type;
5908 * i.e., ID values must uniquely identify the elements which bear them.
5909 *
5910 * [ VC: One ID per Element Type ]
5911 * No element type may have more than one ID attribute specified.
5912 *
5913 * [ VC: ID Attribute Default ]
5914 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5915 *
5916 * [ VC: IDREF ]
5917 * Values of type IDREF must match the Name production, and values
5918 * of type IDREFS must match Names; each IDREF Name must match the value
5919 * of an ID attribute on some element in the XML document; i.e. IDREF
5920 * values must match the value of some ID attribute.
5921 *
5922 * [ VC: Entity Name ]
5923 * Values of type ENTITY must match the Name production, values
5924 * of type ENTITIES must match Names; each Entity Name must match the
5925 * name of an unparsed entity declared in the DTD.
5926 *
5927 * [ VC: Name Token ]
5928 * Values of type NMTOKEN must match the Nmtoken production; values
5929 * of type NMTOKENS must match Nmtokens.
5930 *
5931 * Returns the attribute type
5932 */
5933 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5934 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935 SHRINK;
5936 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5937 SKIP(5);
5938 return(XML_ATTRIBUTE_CDATA);
5939 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5940 SKIP(6);
5941 return(XML_ATTRIBUTE_IDREFS);
5942 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5943 SKIP(5);
5944 return(XML_ATTRIBUTE_IDREF);
5945 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946 SKIP(2);
5947 return(XML_ATTRIBUTE_ID);
5948 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5949 SKIP(6);
5950 return(XML_ATTRIBUTE_ENTITY);
5951 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5952 SKIP(8);
5953 return(XML_ATTRIBUTE_ENTITIES);
5954 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5955 SKIP(8);
5956 return(XML_ATTRIBUTE_NMTOKENS);
5957 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5958 SKIP(7);
5959 return(XML_ATTRIBUTE_NMTOKEN);
5960 }
5961 return(xmlParseEnumeratedType(ctxt, tree));
5962 }
5963
5964 /**
5965 * xmlParseAttributeListDecl:
5966 * @ctxt: an XML parser context
5967 *
5968 * : parse the Attribute list def for an element
5969 *
5970 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5971 *
5972 * [53] AttDef ::= S Name S AttType S DefaultDecl
5973 *
5974 */
5975 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5976 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5977 const xmlChar *elemName;
5978 const xmlChar *attrName;
5979 xmlEnumerationPtr tree;
5980
5981 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5982 int inputid = ctxt->input->id;
5983
5984 SKIP(9);
5985 if (SKIP_BLANKS == 0) {
5986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5987 "Space required after '<!ATTLIST'\n");
5988 }
5989 elemName = xmlParseName(ctxt);
5990 if (elemName == NULL) {
5991 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992 "ATTLIST: no name for Element\n");
5993 return;
5994 }
5995 SKIP_BLANKS;
5996 GROW;
5997 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5998 int type;
5999 int def;
6000 xmlChar *defaultValue = NULL;
6001
6002 GROW;
6003 tree = NULL;
6004 attrName = xmlParseName(ctxt);
6005 if (attrName == NULL) {
6006 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007 "ATTLIST: no name for Attribute\n");
6008 break;
6009 }
6010 GROW;
6011 if (SKIP_BLANKS == 0) {
6012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013 "Space required after the attribute name\n");
6014 break;
6015 }
6016
6017 type = xmlParseAttributeType(ctxt, &tree);
6018 if (type <= 0) {
6019 break;
6020 }
6021
6022 GROW;
6023 if (SKIP_BLANKS == 0) {
6024 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025 "Space required after the attribute type\n");
6026 if (tree != NULL)
6027 xmlFreeEnumeration(tree);
6028 break;
6029 }
6030
6031 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032 if (def <= 0) {
6033 if (defaultValue != NULL)
6034 xmlFree(defaultValue);
6035 if (tree != NULL)
6036 xmlFreeEnumeration(tree);
6037 break;
6038 }
6039 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6041
6042 GROW;
6043 if (RAW != '>') {
6044 if (SKIP_BLANKS == 0) {
6045 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6046 "Space required after the attribute default value\n");
6047 if (defaultValue != NULL)
6048 xmlFree(defaultValue);
6049 if (tree != NULL)
6050 xmlFreeEnumeration(tree);
6051 break;
6052 }
6053 }
6054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055 (ctxt->sax->attributeDecl != NULL))
6056 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057 type, def, defaultValue, tree);
6058 else if (tree != NULL)
6059 xmlFreeEnumeration(tree);
6060
6061 if ((ctxt->sax2) && (defaultValue != NULL) &&
6062 (def != XML_ATTRIBUTE_IMPLIED) &&
6063 (def != XML_ATTRIBUTE_REQUIRED)) {
6064 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6065 }
6066 if (ctxt->sax2) {
6067 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6068 }
6069 if (defaultValue != NULL)
6070 xmlFree(defaultValue);
6071 GROW;
6072 }
6073 if (RAW == '>') {
6074 if (inputid != ctxt->input->id) {
6075 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6076 "Attribute list declaration doesn't start and"
6077 " stop in the same entity\n");
6078 }
6079 NEXT;
6080 }
6081 }
6082 }
6083
6084 /**
6085 * xmlParseElementMixedContentDecl:
6086 * @ctxt: an XML parser context
6087 * @inputchk: the input used for the current entity, needed for boundary checks
6088 *
6089 * parse the declaration for a Mixed Element content
6090 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6091 *
6092 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093 * '(' S? '#PCDATA' S? ')'
6094 *
6095 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6096 *
6097 * [ VC: No Duplicate Types ]
6098 * The same name must not appear more than once in a single
6099 * mixed-content declaration.
6100 *
6101 * returns: the list of the xmlElementContentPtr describing the element choices
6102 */
6103 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6104 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6105 xmlElementContentPtr ret = NULL, cur = NULL, n;
6106 const xmlChar *elem = NULL;
6107
6108 GROW;
6109 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6110 SKIP(7);
6111 SKIP_BLANKS;
6112 SHRINK;
6113 if (RAW == ')') {
6114 if (ctxt->input->id != inputchk) {
6115 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6116 "Element content declaration doesn't start and"
6117 " stop in the same entity\n");
6118 }
6119 NEXT;
6120 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121 if (ret == NULL)
6122 return(NULL);
6123 if (RAW == '*') {
6124 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 NEXT;
6126 }
6127 return(ret);
6128 }
6129 if ((RAW == '(') || (RAW == '|')) {
6130 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131 if (ret == NULL) return(NULL);
6132 }
6133 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6134 NEXT;
6135 if (elem == NULL) {
6136 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137 if (ret == NULL) {
6138 xmlFreeDocElementContent(ctxt->myDoc, cur);
6139 return(NULL);
6140 }
6141 ret->c1 = cur;
6142 if (cur != NULL)
6143 cur->parent = ret;
6144 cur = ret;
6145 } else {
6146 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147 if (n == NULL) {
6148 xmlFreeDocElementContent(ctxt->myDoc, ret);
6149 return(NULL);
6150 }
6151 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6152 if (n->c1 != NULL)
6153 n->c1->parent = n;
6154 cur->c2 = n;
6155 if (n != NULL)
6156 n->parent = cur;
6157 cur = n;
6158 }
6159 SKIP_BLANKS;
6160 elem = xmlParseName(ctxt);
6161 if (elem == NULL) {
6162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6163 "xmlParseElementMixedContentDecl : Name expected\n");
6164 xmlFreeDocElementContent(ctxt->myDoc, ret);
6165 return(NULL);
6166 }
6167 SKIP_BLANKS;
6168 GROW;
6169 }
6170 if ((RAW == ')') && (NXT(1) == '*')) {
6171 if (elem != NULL) {
6172 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6173 XML_ELEMENT_CONTENT_ELEMENT);
6174 if (cur->c2 != NULL)
6175 cur->c2->parent = cur;
6176 }
6177 if (ret != NULL)
6178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 if (ctxt->input->id != inputchk) {
6180 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181 "Element content declaration doesn't start and"
6182 " stop in the same entity\n");
6183 }
6184 SKIP(2);
6185 } else {
6186 xmlFreeDocElementContent(ctxt->myDoc, ret);
6187 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6188 return(NULL);
6189 }
6190
6191 } else {
6192 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6193 }
6194 return(ret);
6195 }
6196
6197 /**
6198 * xmlParseElementChildrenContentDeclPriv:
6199 * @ctxt: an XML parser context
6200 * @inputchk: the input used for the current entity, needed for boundary checks
6201 * @depth: the level of recursion
6202 *
6203 * parse the declaration for a Mixed Element content
6204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6205 *
6206 *
6207 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6208 *
6209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6210 *
6211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6212 *
6213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6214 *
6215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216 * TODO Parameter-entity replacement text must be properly nested
6217 * with parenthesized groups. That is to say, if either of the
6218 * opening or closing parentheses in a choice, seq, or Mixed
6219 * construct is contained in the replacement text for a parameter
6220 * entity, both must be contained in the same replacement text. For
6221 * interoperability, if a parameter-entity reference appears in a
6222 * choice, seq, or Mixed construct, its replacement text should not
6223 * be empty, and neither the first nor last non-blank character of
6224 * the replacement text should be a connector (| or ,).
6225 *
6226 * Returns the tree of xmlElementContentPtr describing the element
6227 * hierarchy.
6228 */
6229 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6230 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6231 int depth) {
6232 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6233 const xmlChar *elem;
6234 xmlChar type = 0;
6235
6236 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6237 (depth > 2048)) {
6238 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6239 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6240 depth);
6241 return(NULL);
6242 }
6243 SKIP_BLANKS;
6244 GROW;
6245 if (RAW == '(') {
6246 int inputid = ctxt->input->id;
6247
6248 /* Recurse on first child */
6249 NEXT;
6250 SKIP_BLANKS;
6251 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6252 depth + 1);
6253 if (cur == NULL)
6254 return(NULL);
6255 SKIP_BLANKS;
6256 GROW;
6257 } else {
6258 elem = xmlParseName(ctxt);
6259 if (elem == NULL) {
6260 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6261 return(NULL);
6262 }
6263 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6264 if (cur == NULL) {
6265 xmlErrMemory(ctxt, NULL);
6266 return(NULL);
6267 }
6268 GROW;
6269 if (RAW == '?') {
6270 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6271 NEXT;
6272 } else if (RAW == '*') {
6273 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6274 NEXT;
6275 } else if (RAW == '+') {
6276 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6277 NEXT;
6278 } else {
6279 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6280 }
6281 GROW;
6282 }
6283 SKIP_BLANKS;
6284 SHRINK;
6285 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6286 /*
6287 * Each loop we parse one separator and one element.
6288 */
6289 if (RAW == ',') {
6290 if (type == 0) type = CUR;
6291
6292 /*
6293 * Detect "Name | Name , Name" error
6294 */
6295 else if (type != CUR) {
6296 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6297 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6298 type);
6299 if ((last != NULL) && (last != ret))
6300 xmlFreeDocElementContent(ctxt->myDoc, last);
6301 if (ret != NULL)
6302 xmlFreeDocElementContent(ctxt->myDoc, ret);
6303 return(NULL);
6304 }
6305 NEXT;
6306
6307 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6308 if (op == NULL) {
6309 if ((last != NULL) && (last != ret))
6310 xmlFreeDocElementContent(ctxt->myDoc, last);
6311 xmlFreeDocElementContent(ctxt->myDoc, ret);
6312 return(NULL);
6313 }
6314 if (last == NULL) {
6315 op->c1 = ret;
6316 if (ret != NULL)
6317 ret->parent = op;
6318 ret = cur = op;
6319 } else {
6320 cur->c2 = op;
6321 if (op != NULL)
6322 op->parent = cur;
6323 op->c1 = last;
6324 if (last != NULL)
6325 last->parent = op;
6326 cur =op;
6327 last = NULL;
6328 }
6329 } else if (RAW == '|') {
6330 if (type == 0) type = CUR;
6331
6332 /*
6333 * Detect "Name , Name | Name" error
6334 */
6335 else if (type != CUR) {
6336 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6337 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6338 type);
6339 if ((last != NULL) && (last != ret))
6340 xmlFreeDocElementContent(ctxt->myDoc, last);
6341 if (ret != NULL)
6342 xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 return(NULL);
6344 }
6345 NEXT;
6346
6347 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6348 if (op == NULL) {
6349 if ((last != NULL) && (last != ret))
6350 xmlFreeDocElementContent(ctxt->myDoc, last);
6351 if (ret != NULL)
6352 xmlFreeDocElementContent(ctxt->myDoc, ret);
6353 return(NULL);
6354 }
6355 if (last == NULL) {
6356 op->c1 = ret;
6357 if (ret != NULL)
6358 ret->parent = op;
6359 ret = cur = op;
6360 } else {
6361 cur->c2 = op;
6362 if (op != NULL)
6363 op->parent = cur;
6364 op->c1 = last;
6365 if (last != NULL)
6366 last->parent = op;
6367 cur =op;
6368 last = NULL;
6369 }
6370 } else {
6371 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6372 if ((last != NULL) && (last != ret))
6373 xmlFreeDocElementContent(ctxt->myDoc, last);
6374 if (ret != NULL)
6375 xmlFreeDocElementContent(ctxt->myDoc, ret);
6376 return(NULL);
6377 }
6378 GROW;
6379 SKIP_BLANKS;
6380 GROW;
6381 if (RAW == '(') {
6382 int inputid = ctxt->input->id;
6383 /* Recurse on second child */
6384 NEXT;
6385 SKIP_BLANKS;
6386 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387 depth + 1);
6388 if (last == NULL) {
6389 if (ret != NULL)
6390 xmlFreeDocElementContent(ctxt->myDoc, ret);
6391 return(NULL);
6392 }
6393 SKIP_BLANKS;
6394 } else {
6395 elem = xmlParseName(ctxt);
6396 if (elem == NULL) {
6397 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6398 if (ret != NULL)
6399 xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 return(NULL);
6401 }
6402 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6403 if (last == NULL) {
6404 if (ret != NULL)
6405 xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 return(NULL);
6407 }
6408 if (RAW == '?') {
6409 last->ocur = XML_ELEMENT_CONTENT_OPT;
6410 NEXT;
6411 } else if (RAW == '*') {
6412 last->ocur = XML_ELEMENT_CONTENT_MULT;
6413 NEXT;
6414 } else if (RAW == '+') {
6415 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416 NEXT;
6417 } else {
6418 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6419 }
6420 }
6421 SKIP_BLANKS;
6422 GROW;
6423 }
6424 if ((cur != NULL) && (last != NULL)) {
6425 cur->c2 = last;
6426 if (last != NULL)
6427 last->parent = cur;
6428 }
6429 if (ctxt->input->id != inputchk) {
6430 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431 "Element content declaration doesn't start and stop in"
6432 " the same entity\n");
6433 }
6434 NEXT;
6435 if (RAW == '?') {
6436 if (ret != NULL) {
6437 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440 else
6441 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6442 }
6443 NEXT;
6444 } else if (RAW == '*') {
6445 if (ret != NULL) {
6446 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6447 cur = ret;
6448 /*
6449 * Some normalization:
6450 * (a | b* | c?)* == (a | b | c)*
6451 */
6452 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6453 if ((cur->c1 != NULL) &&
6454 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 if ((cur->c2 != NULL) &&
6458 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 cur = cur->c2;
6462 }
6463 }
6464 NEXT;
6465 } else if (RAW == '+') {
6466 if (ret != NULL) {
6467 int found = 0;
6468
6469 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6472 else
6473 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6474 /*
6475 * Some normalization:
6476 * (a | b*)+ == (a | b)*
6477 * (a | b?)+ == (a | b)*
6478 */
6479 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6480 if ((cur->c1 != NULL) &&
6481 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6485 }
6486 if ((cur->c2 != NULL) &&
6487 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 found = 1;
6491 }
6492 cur = cur->c2;
6493 }
6494 if (found)
6495 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6496 }
6497 NEXT;
6498 }
6499 return(ret);
6500 }
6501
6502 /**
6503 * xmlParseElementChildrenContentDecl:
6504 * @ctxt: an XML parser context
6505 * @inputchk: the input used for the current entity, needed for boundary checks
6506 *
6507 * parse the declaration for a Mixed Element content
6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6509 *
6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6511 *
6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6513 *
6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6515 *
6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6517 *
6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519 * TODO Parameter-entity replacement text must be properly nested
6520 * with parenthesized groups. That is to say, if either of the
6521 * opening or closing parentheses in a choice, seq, or Mixed
6522 * construct is contained in the replacement text for a parameter
6523 * entity, both must be contained in the same replacement text. For
6524 * interoperability, if a parameter-entity reference appears in a
6525 * choice, seq, or Mixed construct, its replacement text should not
6526 * be empty, and neither the first nor last non-blank character of
6527 * the replacement text should be a connector (| or ,).
6528 *
6529 * Returns the tree of xmlElementContentPtr describing the element
6530 * hierarchy.
6531 */
6532 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6533 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534 /* stub left for API/ABI compat */
6535 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6536 }
6537
6538 /**
6539 * xmlParseElementContentDecl:
6540 * @ctxt: an XML parser context
6541 * @name: the name of the element being defined.
6542 * @result: the Element Content pointer will be stored here if any
6543 *
6544 * parse the declaration for an Element content either Mixed or Children,
6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6546 *
6547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6548 *
6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6550 */
6551
6552 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6553 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6554 xmlElementContentPtr *result) {
6555
6556 xmlElementContentPtr tree = NULL;
6557 int inputid = ctxt->input->id;
6558 int res;
6559
6560 *result = NULL;
6561
6562 if (RAW != '(') {
6563 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6564 "xmlParseElementContentDecl : %s '(' expected\n", name);
6565 return(-1);
6566 }
6567 NEXT;
6568 GROW;
6569 if (ctxt->instate == XML_PARSER_EOF)
6570 return(-1);
6571 SKIP_BLANKS;
6572 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6574 res = XML_ELEMENT_TYPE_MIXED;
6575 } else {
6576 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6577 res = XML_ELEMENT_TYPE_ELEMENT;
6578 }
6579 SKIP_BLANKS;
6580 *result = tree;
6581 return(res);
6582 }
6583
6584 /**
6585 * xmlParseElementDecl:
6586 * @ctxt: an XML parser context
6587 *
6588 * parse an Element declaration.
6589 *
6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6591 *
6592 * [ VC: Unique Element Type Declaration ]
6593 * No element type may be declared more than once
6594 *
6595 * Returns the type of the element, or -1 in case of error
6596 */
6597 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6598 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6599 const xmlChar *name;
6600 int ret = -1;
6601 xmlElementContentPtr content = NULL;
6602
6603 /* GROW; done in the caller */
6604 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605 int inputid = ctxt->input->id;
6606
6607 SKIP(9);
6608 if (SKIP_BLANKS == 0) {
6609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610 "Space required after 'ELEMENT'\n");
6611 return(-1);
6612 }
6613 name = xmlParseName(ctxt);
6614 if (name == NULL) {
6615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616 "xmlParseElementDecl: no name for Element\n");
6617 return(-1);
6618 }
6619 if (SKIP_BLANKS == 0) {
6620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6621 "Space required after the element name\n");
6622 }
6623 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6624 SKIP(5);
6625 /*
6626 * Element must always be empty.
6627 */
6628 ret = XML_ELEMENT_TYPE_EMPTY;
6629 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6630 (NXT(2) == 'Y')) {
6631 SKIP(3);
6632 /*
6633 * Element is a generic container.
6634 */
6635 ret = XML_ELEMENT_TYPE_ANY;
6636 } else if (RAW == '(') {
6637 ret = xmlParseElementContentDecl(ctxt, name, &content);
6638 } else {
6639 /*
6640 * [ WFC: PEs in Internal Subset ] error handling.
6641 */
6642 if ((RAW == '%') && (ctxt->external == 0) &&
6643 (ctxt->inputNr == 1)) {
6644 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6645 "PEReference: forbidden within markup decl in internal subset\n");
6646 } else {
6647 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6649 }
6650 return(-1);
6651 }
6652
6653 SKIP_BLANKS;
6654
6655 if (RAW != '>') {
6656 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6657 if (content != NULL) {
6658 xmlFreeDocElementContent(ctxt->myDoc, content);
6659 }
6660 } else {
6661 if (inputid != ctxt->input->id) {
6662 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6663 "Element declaration doesn't start and stop in"
6664 " the same entity\n");
6665 }
6666
6667 NEXT;
6668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6669 (ctxt->sax->elementDecl != NULL)) {
6670 if (content != NULL)
6671 content->parent = NULL;
6672 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6673 content);
6674 if ((content != NULL) && (content->parent == NULL)) {
6675 /*
6676 * this is a trick: if xmlAddElementDecl is called,
6677 * instead of copying the full tree it is plugged directly
6678 * if called from the parser. Avoid duplicating the
6679 * interfaces or change the API/ABI
6680 */
6681 xmlFreeDocElementContent(ctxt->myDoc, content);
6682 }
6683 } else if (content != NULL) {
6684 xmlFreeDocElementContent(ctxt->myDoc, content);
6685 }
6686 }
6687 }
6688 return(ret);
6689 }
6690
6691 /**
6692 * xmlParseConditionalSections
6693 * @ctxt: an XML parser context
6694 *
6695 * [61] conditionalSect ::= includeSect | ignoreSect
6696 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6697 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6700 */
6701
6702 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6703 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6704 int *inputIds = NULL;
6705 size_t inputIdsSize = 0;
6706 size_t depth = 0;
6707
6708 while (ctxt->instate != XML_PARSER_EOF) {
6709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710 int id = ctxt->input->id;
6711
6712 SKIP(3);
6713 SKIP_BLANKS;
6714
6715 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716 SKIP(7);
6717 SKIP_BLANKS;
6718 if (RAW != '[') {
6719 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6720 xmlHaltParser(ctxt);
6721 goto error;
6722 }
6723 if (ctxt->input->id != id) {
6724 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6725 "All markup of the conditional section is"
6726 " not in the same entity\n");
6727 }
6728 NEXT;
6729
6730 if (inputIdsSize <= depth) {
6731 int *tmp;
6732
6733 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6734 tmp = (int *) xmlRealloc(inputIds,
6735 inputIdsSize * sizeof(int));
6736 if (tmp == NULL) {
6737 xmlErrMemory(ctxt, NULL);
6738 goto error;
6739 }
6740 inputIds = tmp;
6741 }
6742 inputIds[depth] = id;
6743 depth++;
6744 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6745 int state;
6746 xmlParserInputState instate;
6747 size_t ignoreDepth = 0;
6748
6749 SKIP(6);
6750 SKIP_BLANKS;
6751 if (RAW != '[') {
6752 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6753 xmlHaltParser(ctxt);
6754 goto error;
6755 }
6756 if (ctxt->input->id != id) {
6757 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6758 "All markup of the conditional section is"
6759 " not in the same entity\n");
6760 }
6761 NEXT;
6762
6763 /*
6764 * Parse up to the end of the conditional section but disable
6765 * SAX event generating DTD building in the meantime
6766 */
6767 state = ctxt->disableSAX;
6768 instate = ctxt->instate;
6769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6770 ctxt->instate = XML_PARSER_IGNORE;
6771
6772 while (RAW != 0) {
6773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6774 SKIP(3);
6775 ignoreDepth++;
6776 /* Check for integer overflow */
6777 if (ignoreDepth == 0) {
6778 xmlErrMemory(ctxt, NULL);
6779 goto error;
6780 }
6781 } else if ((RAW == ']') && (NXT(1) == ']') &&
6782 (NXT(2) == '>')) {
6783 if (ignoreDepth == 0)
6784 break;
6785 SKIP(3);
6786 ignoreDepth--;
6787 } else {
6788 NEXT;
6789 }
6790 }
6791
6792 ctxt->disableSAX = state;
6793 ctxt->instate = instate;
6794
6795 if (RAW == 0) {
6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6797 goto error;
6798 }
6799 if (ctxt->input->id != id) {
6800 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801 "All markup of the conditional section is"
6802 " not in the same entity\n");
6803 }
6804 SKIP(3);
6805 } else {
6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6807 xmlHaltParser(ctxt);
6808 goto error;
6809 }
6810 } else if ((depth > 0) &&
6811 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6812 depth--;
6813 if (ctxt->input->id != inputIds[depth]) {
6814 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815 "All markup of the conditional section is not"
6816 " in the same entity\n");
6817 }
6818 SKIP(3);
6819 } else {
6820 int id = ctxt->input->id;
6821 unsigned long cons = CUR_CONSUMED;
6822
6823 xmlParseMarkupDecl(ctxt);
6824
6825 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6826 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6827 xmlHaltParser(ctxt);
6828 goto error;
6829 }
6830 }
6831
6832 if (depth == 0)
6833 break;
6834
6835 SKIP_BLANKS;
6836 GROW;
6837 }
6838
6839 error:
6840 xmlFree(inputIds);
6841 }
6842
6843 /**
6844 * xmlParseMarkupDecl:
6845 * @ctxt: an XML parser context
6846 *
6847 * parse Markup declarations
6848 *
6849 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850 * NotationDecl | PI | Comment
6851 *
6852 * [ VC: Proper Declaration/PE Nesting ]
6853 * Parameter-entity replacement text must be properly nested with
6854 * markup declarations. That is to say, if either the first character
6855 * or the last character of a markup declaration (markupdecl above) is
6856 * contained in the replacement text for a parameter-entity reference,
6857 * both must be contained in the same replacement text.
6858 *
6859 * [ WFC: PEs in Internal Subset ]
6860 * In the internal DTD subset, parameter-entity references can occur
6861 * only where markup declarations can occur, not within markup declarations.
6862 * (This does not apply to references that occur in external parameter
6863 * entities or to the external subset.)
6864 */
6865 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6866 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6867 GROW;
6868 if (CUR == '<') {
6869 if (NXT(1) == '!') {
6870 switch (NXT(2)) {
6871 case 'E':
6872 if (NXT(3) == 'L')
6873 xmlParseElementDecl(ctxt);
6874 else if (NXT(3) == 'N')
6875 xmlParseEntityDecl(ctxt);
6876 break;
6877 case 'A':
6878 xmlParseAttributeListDecl(ctxt);
6879 break;
6880 case 'N':
6881 xmlParseNotationDecl(ctxt);
6882 break;
6883 case '-':
6884 xmlParseComment(ctxt);
6885 break;
6886 default:
6887 /* there is an error but it will be detected later */
6888 break;
6889 }
6890 } else if (NXT(1) == '?') {
6891 xmlParsePI(ctxt);
6892 }
6893 }
6894
6895 /*
6896 * detect requirement to exit there and act accordingly
6897 * and avoid having instate overridden later on
6898 */
6899 if (ctxt->instate == XML_PARSER_EOF)
6900 return;
6901
6902 ctxt->instate = XML_PARSER_DTD;
6903 }
6904
6905 /**
6906 * xmlParseTextDecl:
6907 * @ctxt: an XML parser context
6908 *
6909 * parse an XML declaration header for external entities
6910 *
6911 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6912 */
6913
6914 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6915 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6916 xmlChar *version;
6917 const xmlChar *encoding;
6918 int oldstate;
6919
6920 /*
6921 * We know that '<?xml' is here.
6922 */
6923 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6924 SKIP(5);
6925 } else {
6926 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6927 return;
6928 }
6929
6930 /* Avoid expansion of parameter entities when skipping blanks. */
6931 oldstate = ctxt->instate;
6932 ctxt->instate = XML_PARSER_START;
6933
6934 if (SKIP_BLANKS == 0) {
6935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6936 "Space needed after '<?xml'\n");
6937 }
6938
6939 /*
6940 * We may have the VersionInfo here.
6941 */
6942 version = xmlParseVersionInfo(ctxt);
6943 if (version == NULL)
6944 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6945 else {
6946 if (SKIP_BLANKS == 0) {
6947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948 "Space needed here\n");
6949 }
6950 }
6951 ctxt->input->version = version;
6952
6953 /*
6954 * We must have the encoding declaration
6955 */
6956 encoding = xmlParseEncodingDecl(ctxt);
6957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6958 /*
6959 * The XML REC instructs us to stop parsing right here
6960 */
6961 ctxt->instate = oldstate;
6962 return;
6963 }
6964 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6965 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6966 "Missing encoding in text declaration\n");
6967 }
6968
6969 SKIP_BLANKS;
6970 if ((RAW == '?') && (NXT(1) == '>')) {
6971 SKIP(2);
6972 } else if (RAW == '>') {
6973 /* Deprecated old WD ... */
6974 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6975 NEXT;
6976 } else {
6977 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6978 MOVETO_ENDTAG(CUR_PTR);
6979 NEXT;
6980 }
6981
6982 ctxt->instate = oldstate;
6983 }
6984
6985 /**
6986 * xmlParseExternalSubset:
6987 * @ctxt: an XML parser context
6988 * @ExternalID: the external identifier
6989 * @SystemID: the system identifier (or URL)
6990 *
6991 * parse Markup declarations from an external subset
6992 *
6993 * [30] extSubset ::= textDecl? extSubsetDecl
6994 *
6995 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6996 */
6997 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6998 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6999 const xmlChar *SystemID) {
7000 xmlDetectSAX2(ctxt);
7001 GROW;
7002
7003 if ((ctxt->encoding == NULL) &&
7004 (ctxt->input->end - ctxt->input->cur >= 4)) {
7005 xmlChar start[4];
7006 xmlCharEncoding enc;
7007
7008 start[0] = RAW;
7009 start[1] = NXT(1);
7010 start[2] = NXT(2);
7011 start[3] = NXT(3);
7012 enc = xmlDetectCharEncoding(start, 4);
7013 if (enc != XML_CHAR_ENCODING_NONE)
7014 xmlSwitchEncoding(ctxt, enc);
7015 }
7016
7017 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7018 xmlParseTextDecl(ctxt);
7019 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7020 /*
7021 * The XML REC instructs us to stop parsing right here
7022 */
7023 xmlHaltParser(ctxt);
7024 return;
7025 }
7026 }
7027 if (ctxt->myDoc == NULL) {
7028 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7029 if (ctxt->myDoc == NULL) {
7030 xmlErrMemory(ctxt, "New Doc failed");
7031 return;
7032 }
7033 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7034 }
7035 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7036 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7037
7038 ctxt->instate = XML_PARSER_DTD;
7039 ctxt->external = 1;
7040 SKIP_BLANKS;
7041 while (((RAW == '<') && (NXT(1) == '?')) ||
7042 ((RAW == '<') && (NXT(1) == '!')) ||
7043 (RAW == '%')) {
7044 int id = ctxt->input->id;
7045 unsigned long cons = CUR_CONSUMED;
7046
7047 GROW;
7048 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049 xmlParseConditionalSections(ctxt);
7050 } else
7051 xmlParseMarkupDecl(ctxt);
7052 SKIP_BLANKS;
7053
7054 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056 break;
7057 }
7058 }
7059
7060 if (RAW != 0) {
7061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7062 }
7063
7064 }
7065
7066 /**
7067 * xmlParseReference:
7068 * @ctxt: an XML parser context
7069 *
7070 * parse and handle entity references in content, depending on the SAX
7071 * interface, this may end-up in a call to character() if this is a
7072 * CharRef, a predefined entity, if there is no reference() callback.
7073 * or if the parser was asked to switch to that mode.
7074 *
7075 * [67] Reference ::= EntityRef | CharRef
7076 */
7077 void
xmlParseReference(xmlParserCtxtPtr ctxt)7078 xmlParseReference(xmlParserCtxtPtr ctxt) {
7079 xmlEntityPtr ent;
7080 xmlChar *val;
7081 int was_checked;
7082 xmlNodePtr list = NULL;
7083 xmlParserErrors ret = XML_ERR_OK;
7084
7085
7086 if (RAW != '&')
7087 return;
7088
7089 /*
7090 * Simple case of a CharRef
7091 */
7092 if (NXT(1) == '#') {
7093 int i = 0;
7094 xmlChar out[16];
7095 int hex = NXT(2);
7096 int value = xmlParseCharRef(ctxt);
7097
7098 if (value == 0)
7099 return;
7100 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7101 /*
7102 * So we are using non-UTF-8 buffers
7103 * Check that the char fit on 8bits, if not
7104 * generate a CharRef.
7105 */
7106 if (value <= 0xFF) {
7107 out[0] = value;
7108 out[1] = 0;
7109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7110 (!ctxt->disableSAX))
7111 ctxt->sax->characters(ctxt->userData, out, 1);
7112 } else {
7113 if ((hex == 'x') || (hex == 'X'))
7114 snprintf((char *)out, sizeof(out), "#x%X", value);
7115 else
7116 snprintf((char *)out, sizeof(out), "#%d", value);
7117 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7118 (!ctxt->disableSAX))
7119 ctxt->sax->reference(ctxt->userData, out);
7120 }
7121 } else {
7122 /*
7123 * Just encode the value in UTF-8
7124 */
7125 COPY_BUF(0 ,out, i, value);
7126 out[i] = 0;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->characters(ctxt->userData, out, i);
7130 }
7131 return;
7132 }
7133
7134 /*
7135 * We are seeing an entity reference
7136 */
7137 ent = xmlParseEntityRef(ctxt);
7138 if (ent == NULL) return;
7139 if (!ctxt->wellFormed)
7140 return;
7141 was_checked = ent->checked;
7142
7143 /* special case of predefined entities */
7144 if ((ent->name == NULL) ||
7145 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7146 val = ent->content;
7147 if (val == NULL) return;
7148 /*
7149 * inline the entity.
7150 */
7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 (!ctxt->disableSAX))
7153 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7154 return;
7155 }
7156
7157 /*
7158 * The first reference to the entity trigger a parsing phase
7159 * where the ent->children is filled with the result from
7160 * the parsing.
7161 * Note: external parsed entities will not be loaded, it is not
7162 * required for a non-validating parser, unless the parsing option
7163 * of validating, or substituting entities were given. Doing so is
7164 * far more secure as the parser will only process data coming from
7165 * the document entity by default.
7166 */
7167 if (((ent->checked == 0) ||
7168 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7169 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7170 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7171 unsigned long oldnbent = ctxt->nbentities, diff;
7172
7173 /*
7174 * This is a bit hackish but this seems the best
7175 * way to make sure both SAX and DOM entity support
7176 * behaves okay.
7177 */
7178 void *user_data;
7179 if (ctxt->userData == ctxt)
7180 user_data = NULL;
7181 else
7182 user_data = ctxt->userData;
7183
7184 /*
7185 * Check that this entity is well formed
7186 * 4.3.2: An internal general parsed entity is well-formed
7187 * if its replacement text matches the production labeled
7188 * content.
7189 */
7190 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7191 ctxt->depth++;
7192 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7193 user_data, &list);
7194 ctxt->depth--;
7195
7196 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7197 ctxt->depth++;
7198 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7199 user_data, ctxt->depth, ent->URI,
7200 ent->ExternalID, &list);
7201 ctxt->depth--;
7202 } else {
7203 ret = XML_ERR_ENTITY_PE_INTERNAL;
7204 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7205 "invalid entity type found\n", NULL);
7206 }
7207
7208 /*
7209 * Store the number of entities needing parsing for this entity
7210 * content and do checkings
7211 */
7212 diff = ctxt->nbentities - oldnbent + 1;
7213 if (diff > INT_MAX / 2)
7214 diff = INT_MAX / 2;
7215 ent->checked = diff * 2;
7216 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7217 ent->checked |= 1;
7218 if (ret == XML_ERR_ENTITY_LOOP) {
7219 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7220 xmlHaltParser(ctxt);
7221 xmlFreeNodeList(list);
7222 return;
7223 }
7224 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7225 xmlFreeNodeList(list);
7226 return;
7227 }
7228
7229 if ((ret == XML_ERR_OK) && (list != NULL)) {
7230 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7232 (ent->children == NULL)) {
7233 ent->children = list;
7234 /*
7235 * Prune it directly in the generated document
7236 * except for single text nodes.
7237 */
7238 if ((ctxt->replaceEntities == 0) ||
7239 (ctxt->parseMode == XML_PARSE_READER) ||
7240 ((list->type == XML_TEXT_NODE) &&
7241 (list->next == NULL))) {
7242 ent->owner = 1;
7243 while (list != NULL) {
7244 list->parent = (xmlNodePtr) ent;
7245 if (list->doc != ent->doc)
7246 xmlSetTreeDoc(list, ent->doc);
7247 if (list->next == NULL)
7248 ent->last = list;
7249 list = list->next;
7250 }
7251 list = NULL;
7252 } else {
7253 ent->owner = 0;
7254 while (list != NULL) {
7255 list->parent = (xmlNodePtr) ctxt->node;
7256 list->doc = ctxt->myDoc;
7257 if (list->next == NULL)
7258 ent->last = list;
7259 list = list->next;
7260 }
7261 list = ent->children;
7262 #ifdef LIBXML_LEGACY_ENABLED
7263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7264 xmlAddEntityReference(ent, list, NULL);
7265 #endif /* LIBXML_LEGACY_ENABLED */
7266 }
7267 } else {
7268 xmlFreeNodeList(list);
7269 list = NULL;
7270 }
7271 } else if ((ret != XML_ERR_OK) &&
7272 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 "Entity '%s' failed to parse\n", ent->name);
7275 if (ent->content != NULL)
7276 ent->content[0] = 0;
7277 xmlParserEntityCheck(ctxt, 0, ent, 0);
7278 } else if (list != NULL) {
7279 xmlFreeNodeList(list);
7280 list = NULL;
7281 }
7282 if (ent->checked == 0)
7283 ent->checked = 2;
7284
7285 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7286 was_checked = 0;
7287 } else if (ent->checked != 1) {
7288 ctxt->nbentities += ent->checked / 2;
7289 }
7290
7291 /*
7292 * Now that the entity content has been gathered
7293 * provide it to the application, this can take different forms based
7294 * on the parsing modes.
7295 */
7296 if (ent->children == NULL) {
7297 /*
7298 * Probably running in SAX mode and the callbacks don't
7299 * build the entity content. So unless we already went
7300 * though parsing for first checking go though the entity
7301 * content to generate callbacks associated to the entity
7302 */
7303 if (was_checked != 0) {
7304 void *user_data;
7305 /*
7306 * This is a bit hackish but this seems the best
7307 * way to make sure both SAX and DOM entity support
7308 * behaves okay.
7309 */
7310 if (ctxt->userData == ctxt)
7311 user_data = NULL;
7312 else
7313 user_data = ctxt->userData;
7314
7315 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7316 ctxt->depth++;
7317 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7318 ent->content, user_data, NULL);
7319 ctxt->depth--;
7320 } else if (ent->etype ==
7321 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7322 ctxt->depth++;
7323 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7324 ctxt->sax, user_data, ctxt->depth,
7325 ent->URI, ent->ExternalID, NULL);
7326 ctxt->depth--;
7327 } else {
7328 ret = XML_ERR_ENTITY_PE_INTERNAL;
7329 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7330 "invalid entity type found\n", NULL);
7331 }
7332 if (ret == XML_ERR_ENTITY_LOOP) {
7333 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7334 return;
7335 }
7336 }
7337 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7338 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7339 /*
7340 * Entity reference callback comes second, it's somewhat
7341 * superfluous but a compatibility to historical behaviour
7342 */
7343 ctxt->sax->reference(ctxt->userData, ent->name);
7344 }
7345 return;
7346 }
7347
7348 /*
7349 * If we didn't get any children for the entity being built
7350 */
7351 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7352 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7353 /*
7354 * Create a node.
7355 */
7356 ctxt->sax->reference(ctxt->userData, ent->name);
7357 return;
7358 }
7359
7360 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7361 /*
7362 * There is a problem on the handling of _private for entities
7363 * (bug 155816): Should we copy the content of the field from
7364 * the entity (possibly overwriting some value set by the user
7365 * when a copy is created), should we leave it alone, or should
7366 * we try to take care of different situations? The problem
7367 * is exacerbated by the usage of this field by the xmlReader.
7368 * To fix this bug, we look at _private on the created node
7369 * and, if it's NULL, we copy in whatever was in the entity.
7370 * If it's not NULL we leave it alone. This is somewhat of a
7371 * hack - maybe we should have further tests to determine
7372 * what to do.
7373 */
7374 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7375 /*
7376 * Seems we are generating the DOM content, do
7377 * a simple tree copy for all references except the first
7378 * In the first occurrence list contains the replacement.
7379 */
7380 if (((list == NULL) && (ent->owner == 0)) ||
7381 (ctxt->parseMode == XML_PARSE_READER)) {
7382 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7383
7384 /*
7385 * We are copying here, make sure there is no abuse
7386 */
7387 ctxt->sizeentcopy += ent->length + 5;
7388 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7389 return;
7390
7391 /*
7392 * when operating on a reader, the entities definitions
7393 * are always owning the entities subtree.
7394 if (ctxt->parseMode == XML_PARSE_READER)
7395 ent->owner = 1;
7396 */
7397
7398 cur = ent->children;
7399 while (cur != NULL) {
7400 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7401 if (nw != NULL) {
7402 if (nw->_private == NULL)
7403 nw->_private = cur->_private;
7404 if (firstChild == NULL){
7405 firstChild = nw;
7406 }
7407 nw = xmlAddChild(ctxt->node, nw);
7408 }
7409 if (cur == ent->last) {
7410 /*
7411 * needed to detect some strange empty
7412 * node cases in the reader tests
7413 */
7414 if ((ctxt->parseMode == XML_PARSE_READER) &&
7415 (nw != NULL) &&
7416 (nw->type == XML_ELEMENT_NODE) &&
7417 (nw->children == NULL))
7418 nw->extra = 1;
7419
7420 break;
7421 }
7422 cur = cur->next;
7423 }
7424 #ifdef LIBXML_LEGACY_ENABLED
7425 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7426 xmlAddEntityReference(ent, firstChild, nw);
7427 #endif /* LIBXML_LEGACY_ENABLED */
7428 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7429 xmlNodePtr nw = NULL, cur, next, last,
7430 firstChild = NULL;
7431
7432 /*
7433 * We are copying here, make sure there is no abuse
7434 */
7435 ctxt->sizeentcopy += ent->length + 5;
7436 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7437 return;
7438
7439 /*
7440 * Copy the entity child list and make it the new
7441 * entity child list. The goal is to make sure any
7442 * ID or REF referenced will be the one from the
7443 * document content and not the entity copy.
7444 */
7445 cur = ent->children;
7446 ent->children = NULL;
7447 last = ent->last;
7448 ent->last = NULL;
7449 while (cur != NULL) {
7450 next = cur->next;
7451 cur->next = NULL;
7452 cur->parent = NULL;
7453 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7454 if (nw != NULL) {
7455 if (nw->_private == NULL)
7456 nw->_private = cur->_private;
7457 if (firstChild == NULL){
7458 firstChild = cur;
7459 }
7460 xmlAddChild((xmlNodePtr) ent, nw);
7461 xmlAddChild(ctxt->node, cur);
7462 }
7463 if (cur == last)
7464 break;
7465 cur = next;
7466 }
7467 if (ent->owner == 0)
7468 ent->owner = 1;
7469 #ifdef LIBXML_LEGACY_ENABLED
7470 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7471 xmlAddEntityReference(ent, firstChild, nw);
7472 #endif /* LIBXML_LEGACY_ENABLED */
7473 } else {
7474 const xmlChar *nbktext;
7475
7476 /*
7477 * the name change is to avoid coalescing of the
7478 * node with a possible previous text one which
7479 * would make ent->children a dangling pointer
7480 */
7481 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7482 -1);
7483 if (ent->children->type == XML_TEXT_NODE)
7484 ent->children->name = nbktext;
7485 if ((ent->last != ent->children) &&
7486 (ent->last->type == XML_TEXT_NODE))
7487 ent->last->name = nbktext;
7488 xmlAddChildList(ctxt->node, ent->children);
7489 }
7490
7491 /*
7492 * This is to avoid a nasty side effect, see
7493 * characters() in SAX.c
7494 */
7495 ctxt->nodemem = 0;
7496 ctxt->nodelen = 0;
7497 return;
7498 }
7499 }
7500 }
7501
7502 /**
7503 * xmlParseEntityRef:
7504 * @ctxt: an XML parser context
7505 *
7506 * parse ENTITY references declarations
7507 *
7508 * [68] EntityRef ::= '&' Name ';'
7509 *
7510 * [ WFC: Entity Declared ]
7511 * In a document without any DTD, a document with only an internal DTD
7512 * subset which contains no parameter entity references, or a document
7513 * with "standalone='yes'", the Name given in the entity reference
7514 * must match that in an entity declaration, except that well-formed
7515 * documents need not declare any of the following entities: amp, lt,
7516 * gt, apos, quot. The declaration of a parameter entity must precede
7517 * any reference to it. Similarly, the declaration of a general entity
7518 * must precede any reference to it which appears in a default value in an
7519 * attribute-list declaration. Note that if entities are declared in the
7520 * external subset or in external parameter entities, a non-validating
7521 * processor is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be declared is a
7523 * well-formedness constraint only if standalone='yes'.
7524 *
7525 * [ WFC: Parsed Entity ]
7526 * An entity reference must not contain the name of an unparsed entity
7527 *
7528 * Returns the xmlEntityPtr if found, or NULL otherwise.
7529 */
7530 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7531 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7532 const xmlChar *name;
7533 xmlEntityPtr ent = NULL;
7534
7535 GROW;
7536 if (ctxt->instate == XML_PARSER_EOF)
7537 return(NULL);
7538
7539 if (RAW != '&')
7540 return(NULL);
7541 NEXT;
7542 name = xmlParseName(ctxt);
7543 if (name == NULL) {
7544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7545 "xmlParseEntityRef: no name\n");
7546 return(NULL);
7547 }
7548 if (RAW != ';') {
7549 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7550 return(NULL);
7551 }
7552 NEXT;
7553
7554 /*
7555 * Predefined entities override any extra definition
7556 */
7557 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558 ent = xmlGetPredefinedEntity(name);
7559 if (ent != NULL)
7560 return(ent);
7561 }
7562
7563 /*
7564 * Increase the number of entity references parsed
7565 */
7566 ctxt->nbentities++;
7567
7568 /*
7569 * Ask first SAX for entity resolution, otherwise try the
7570 * entities which may have stored in the parser context.
7571 */
7572 if (ctxt->sax != NULL) {
7573 if (ctxt->sax->getEntity != NULL)
7574 ent = ctxt->sax->getEntity(ctxt->userData, name);
7575 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7576 (ctxt->options & XML_PARSE_OLDSAX))
7577 ent = xmlGetPredefinedEntity(name);
7578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7579 (ctxt->userData==ctxt)) {
7580 ent = xmlSAX2GetEntity(ctxt, name);
7581 }
7582 }
7583 if (ctxt->instate == XML_PARSER_EOF)
7584 return(NULL);
7585 /*
7586 * [ WFC: Entity Declared ]
7587 * In a document without any DTD, a document with only an
7588 * internal DTD subset which contains no parameter entity
7589 * references, or a document with "standalone='yes'", the
7590 * Name given in the entity reference must match that in an
7591 * entity declaration, except that well-formed documents
7592 * need not declare any of the following entities: amp, lt,
7593 * gt, apos, quot.
7594 * The declaration of a parameter entity must precede any
7595 * reference to it.
7596 * Similarly, the declaration of a general entity must
7597 * precede any reference to it which appears in a default
7598 * value in an attribute-list declaration. Note that if
7599 * entities are declared in the external subset or in
7600 * external parameter entities, a non-validating processor
7601 * is not obligated to read and process their declarations;
7602 * for such documents, the rule that an entity must be
7603 * declared is a well-formedness constraint only if
7604 * standalone='yes'.
7605 */
7606 if (ent == NULL) {
7607 if ((ctxt->standalone == 1) ||
7608 ((ctxt->hasExternalSubset == 0) &&
7609 (ctxt->hasPErefs == 0))) {
7610 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7611 "Entity '%s' not defined\n", name);
7612 } else {
7613 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7614 "Entity '%s' not defined\n", name);
7615 if ((ctxt->inSubset == 0) &&
7616 (ctxt->sax != NULL) &&
7617 (ctxt->sax->reference != NULL)) {
7618 ctxt->sax->reference(ctxt->userData, name);
7619 }
7620 }
7621 xmlParserEntityCheck(ctxt, 0, ent, 0);
7622 ctxt->valid = 0;
7623 }
7624
7625 /*
7626 * [ WFC: Parsed Entity ]
7627 * An entity reference must not contain the name of an
7628 * unparsed entity
7629 */
7630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7631 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7632 "Entity reference to unparsed entity %s\n", name);
7633 }
7634
7635 /*
7636 * [ WFC: No External Entity References ]
7637 * Attribute values cannot contain direct or indirect
7638 * entity references to external entities.
7639 */
7640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7642 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7643 "Attribute references external entity '%s'\n", name);
7644 }
7645 /*
7646 * [ WFC: No < in Attribute Values ]
7647 * The replacement text of any entity referred to directly or
7648 * indirectly in an attribute value (other than "<") must
7649 * not contain a <.
7650 */
7651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652 (ent != NULL) &&
7653 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7654 if (((ent->checked & 1) || (ent->checked == 0)) &&
7655 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7656 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7657 "'<' in entity '%s' is not allowed in attributes values\n", name);
7658 }
7659 }
7660
7661 /*
7662 * Internal check, no parameter entities here ...
7663 */
7664 else {
7665 switch (ent->etype) {
7666 case XML_INTERNAL_PARAMETER_ENTITY:
7667 case XML_EXTERNAL_PARAMETER_ENTITY:
7668 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7669 "Attempt to reference the parameter entity '%s'\n",
7670 name);
7671 break;
7672 default:
7673 break;
7674 }
7675 }
7676
7677 /*
7678 * [ WFC: No Recursion ]
7679 * A parsed entity must not contain a recursive reference
7680 * to itself, either directly or indirectly.
7681 * Done somewhere else
7682 */
7683 return(ent);
7684 }
7685
7686 /**
7687 * xmlParseStringEntityRef:
7688 * @ctxt: an XML parser context
7689 * @str: a pointer to an index in the string
7690 *
7691 * parse ENTITY references declarations, but this version parses it from
7692 * a string value.
7693 *
7694 * [68] EntityRef ::= '&' Name ';'
7695 *
7696 * [ WFC: Entity Declared ]
7697 * In a document without any DTD, a document with only an internal DTD
7698 * subset which contains no parameter entity references, or a document
7699 * with "standalone='yes'", the Name given in the entity reference
7700 * must match that in an entity declaration, except that well-formed
7701 * documents need not declare any of the following entities: amp, lt,
7702 * gt, apos, quot. The declaration of a parameter entity must precede
7703 * any reference to it. Similarly, the declaration of a general entity
7704 * must precede any reference to it which appears in a default value in an
7705 * attribute-list declaration. Note that if entities are declared in the
7706 * external subset or in external parameter entities, a non-validating
7707 * processor is not obligated to read and process their declarations;
7708 * for such documents, the rule that an entity must be declared is a
7709 * well-formedness constraint only if standalone='yes'.
7710 *
7711 * [ WFC: Parsed Entity ]
7712 * An entity reference must not contain the name of an unparsed entity
7713 *
7714 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715 * is updated to the current location in the string.
7716 */
7717 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7718 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7719 xmlChar *name;
7720 const xmlChar *ptr;
7721 xmlChar cur;
7722 xmlEntityPtr ent = NULL;
7723
7724 if ((str == NULL) || (*str == NULL))
7725 return(NULL);
7726 ptr = *str;
7727 cur = *ptr;
7728 if (cur != '&')
7729 return(NULL);
7730
7731 ptr++;
7732 name = xmlParseStringName(ctxt, &ptr);
7733 if (name == NULL) {
7734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7735 "xmlParseStringEntityRef: no name\n");
7736 *str = ptr;
7737 return(NULL);
7738 }
7739 if (*ptr != ';') {
7740 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7741 xmlFree(name);
7742 *str = ptr;
7743 return(NULL);
7744 }
7745 ptr++;
7746
7747
7748 /*
7749 * Predefined entities override any extra definition
7750 */
7751 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7752 ent = xmlGetPredefinedEntity(name);
7753 if (ent != NULL) {
7754 xmlFree(name);
7755 *str = ptr;
7756 return(ent);
7757 }
7758 }
7759
7760 /*
7761 * Increase the number of entity references parsed
7762 */
7763 ctxt->nbentities++;
7764
7765 /*
7766 * Ask first SAX for entity resolution, otherwise try the
7767 * entities which may have stored in the parser context.
7768 */
7769 if (ctxt->sax != NULL) {
7770 if (ctxt->sax->getEntity != NULL)
7771 ent = ctxt->sax->getEntity(ctxt->userData, name);
7772 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7773 ent = xmlGetPredefinedEntity(name);
7774 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7775 ent = xmlSAX2GetEntity(ctxt, name);
7776 }
7777 }
7778 if (ctxt->instate == XML_PARSER_EOF) {
7779 xmlFree(name);
7780 return(NULL);
7781 }
7782
7783 /*
7784 * [ WFC: Entity Declared ]
7785 * In a document without any DTD, a document with only an
7786 * internal DTD subset which contains no parameter entity
7787 * references, or a document with "standalone='yes'", the
7788 * Name given in the entity reference must match that in an
7789 * entity declaration, except that well-formed documents
7790 * need not declare any of the following entities: amp, lt,
7791 * gt, apos, quot.
7792 * The declaration of a parameter entity must precede any
7793 * reference to it.
7794 * Similarly, the declaration of a general entity must
7795 * precede any reference to it which appears in a default
7796 * value in an attribute-list declaration. Note that if
7797 * entities are declared in the external subset or in
7798 * external parameter entities, a non-validating processor
7799 * is not obligated to read and process their declarations;
7800 * for such documents, the rule that an entity must be
7801 * declared is a well-formedness constraint only if
7802 * standalone='yes'.
7803 */
7804 if (ent == NULL) {
7805 if ((ctxt->standalone == 1) ||
7806 ((ctxt->hasExternalSubset == 0) &&
7807 (ctxt->hasPErefs == 0))) {
7808 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7809 "Entity '%s' not defined\n", name);
7810 } else {
7811 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7812 "Entity '%s' not defined\n",
7813 name);
7814 }
7815 xmlParserEntityCheck(ctxt, 0, ent, 0);
7816 /* TODO ? check regressions ctxt->valid = 0; */
7817 }
7818
7819 /*
7820 * [ WFC: Parsed Entity ]
7821 * An entity reference must not contain the name of an
7822 * unparsed entity
7823 */
7824 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7825 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7826 "Entity reference to unparsed entity %s\n", name);
7827 }
7828
7829 /*
7830 * [ WFC: No External Entity References ]
7831 * Attribute values cannot contain direct or indirect
7832 * entity references to external entities.
7833 */
7834 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7835 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7836 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7837 "Attribute references external entity '%s'\n", name);
7838 }
7839 /*
7840 * [ WFC: No < in Attribute Values ]
7841 * The replacement text of any entity referred to directly or
7842 * indirectly in an attribute value (other than "<") must
7843 * not contain a <.
7844 */
7845 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7846 (ent != NULL) && (ent->content != NULL) &&
7847 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7848 (xmlStrchr(ent->content, '<'))) {
7849 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7850 "'<' in entity '%s' is not allowed in attributes values\n",
7851 name);
7852 }
7853
7854 /*
7855 * Internal check, no parameter entities here ...
7856 */
7857 else {
7858 switch (ent->etype) {
7859 case XML_INTERNAL_PARAMETER_ENTITY:
7860 case XML_EXTERNAL_PARAMETER_ENTITY:
7861 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7862 "Attempt to reference the parameter entity '%s'\n",
7863 name);
7864 break;
7865 default:
7866 break;
7867 }
7868 }
7869
7870 /*
7871 * [ WFC: No Recursion ]
7872 * A parsed entity must not contain a recursive reference
7873 * to itself, either directly or indirectly.
7874 * Done somewhere else
7875 */
7876
7877 xmlFree(name);
7878 *str = ptr;
7879 return(ent);
7880 }
7881
7882 /**
7883 * xmlParsePEReference:
7884 * @ctxt: an XML parser context
7885 *
7886 * parse PEReference declarations
7887 * The entity content is handled directly by pushing it's content as
7888 * a new input stream.
7889 *
7890 * [69] PEReference ::= '%' Name ';'
7891 *
7892 * [ WFC: No Recursion ]
7893 * A parsed entity must not contain a recursive
7894 * reference to itself, either directly or indirectly.
7895 *
7896 * [ WFC: Entity Declared ]
7897 * In a document without any DTD, a document with only an internal DTD
7898 * subset which contains no parameter entity references, or a document
7899 * with "standalone='yes'", ... ... The declaration of a parameter
7900 * entity must precede any reference to it...
7901 *
7902 * [ VC: Entity Declared ]
7903 * In a document with an external subset or external parameter entities
7904 * with "standalone='no'", ... ... The declaration of a parameter entity
7905 * must precede any reference to it...
7906 *
7907 * [ WFC: In DTD ]
7908 * Parameter-entity references may only appear in the DTD.
7909 * NOTE: misleading but this is handled.
7910 */
7911 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7912 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7913 {
7914 const xmlChar *name;
7915 xmlEntityPtr entity = NULL;
7916 xmlParserInputPtr input;
7917
7918 if (RAW != '%')
7919 return;
7920 NEXT;
7921 name = xmlParseName(ctxt);
7922 if (name == NULL) {
7923 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7924 return;
7925 }
7926 if (xmlParserDebugEntities)
7927 xmlGenericError(xmlGenericErrorContext,
7928 "PEReference: %s\n", name);
7929 if (RAW != ';') {
7930 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7931 return;
7932 }
7933
7934 NEXT;
7935
7936 /*
7937 * Increase the number of entity references parsed
7938 */
7939 ctxt->nbentities++;
7940
7941 /*
7942 * Request the entity from SAX
7943 */
7944 if ((ctxt->sax != NULL) &&
7945 (ctxt->sax->getParameterEntity != NULL))
7946 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7947 if (ctxt->instate == XML_PARSER_EOF)
7948 return;
7949 if (entity == NULL) {
7950 /*
7951 * [ WFC: Entity Declared ]
7952 * In a document without any DTD, a document with only an
7953 * internal DTD subset which contains no parameter entity
7954 * references, or a document with "standalone='yes'", ...
7955 * ... The declaration of a parameter entity must precede
7956 * any reference to it...
7957 */
7958 if ((ctxt->standalone == 1) ||
7959 ((ctxt->hasExternalSubset == 0) &&
7960 (ctxt->hasPErefs == 0))) {
7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7962 "PEReference: %%%s; not found\n",
7963 name);
7964 } else {
7965 /*
7966 * [ VC: Entity Declared ]
7967 * In a document with an external subset or external
7968 * parameter entities with "standalone='no'", ...
7969 * ... The declaration of a parameter entity must
7970 * precede any reference to it...
7971 */
7972 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7973 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7974 "PEReference: %%%s; not found\n",
7975 name, NULL);
7976 } else
7977 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7978 "PEReference: %%%s; not found\n",
7979 name, NULL);
7980 ctxt->valid = 0;
7981 }
7982 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7983 } else {
7984 /*
7985 * Internal checking in case the entity quest barfed
7986 */
7987 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7988 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7989 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7990 "Internal: %%%s; is not a parameter entity\n",
7991 name, NULL);
7992 } else {
7993 xmlChar start[4];
7994 xmlCharEncoding enc;
7995
7996 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7997 return;
7998
7999 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8000 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8001 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8002 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8003 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8004 (ctxt->replaceEntities == 0) &&
8005 (ctxt->validate == 0))
8006 return;
8007
8008 input = xmlNewEntityInputStream(ctxt, entity);
8009 if (xmlPushInput(ctxt, input) < 0) {
8010 xmlFreeInputStream(input);
8011 return;
8012 }
8013
8014 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8015 /*
8016 * Get the 4 first bytes and decode the charset
8017 * if enc != XML_CHAR_ENCODING_NONE
8018 * plug some encoding conversion routines.
8019 * Note that, since we may have some non-UTF8
8020 * encoding (like UTF16, bug 135229), the 'length'
8021 * is not known, but we can calculate based upon
8022 * the amount of data in the buffer.
8023 */
8024 GROW
8025 if (ctxt->instate == XML_PARSER_EOF)
8026 return;
8027 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8028 start[0] = RAW;
8029 start[1] = NXT(1);
8030 start[2] = NXT(2);
8031 start[3] = NXT(3);
8032 enc = xmlDetectCharEncoding(start, 4);
8033 if (enc != XML_CHAR_ENCODING_NONE) {
8034 xmlSwitchEncoding(ctxt, enc);
8035 }
8036 }
8037
8038 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8039 (IS_BLANK_CH(NXT(5)))) {
8040 xmlParseTextDecl(ctxt);
8041 }
8042 }
8043 }
8044 }
8045 ctxt->hasPErefs = 1;
8046 }
8047
8048 /**
8049 * xmlLoadEntityContent:
8050 * @ctxt: an XML parser context
8051 * @entity: an unloaded system entity
8052 *
8053 * Load the original content of the given system entity from the
8054 * ExternalID/SystemID given. This is to be used for Included in Literal
8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056 *
8057 * Returns 0 in case of success and -1 in case of failure
8058 */
8059 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8060 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061 xmlParserInputPtr input;
8062 xmlBufferPtr buf;
8063 int l, c;
8064 int count = 0;
8065
8066 if ((ctxt == NULL) || (entity == NULL) ||
8067 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069 (entity->content != NULL)) {
8070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071 "xmlLoadEntityContent parameter error");
8072 return(-1);
8073 }
8074
8075 if (xmlParserDebugEntities)
8076 xmlGenericError(xmlGenericErrorContext,
8077 "Reading %s entity content input\n", entity->name);
8078
8079 buf = xmlBufferCreate();
8080 if (buf == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent parameter error");
8083 return(-1);
8084 }
8085 xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8086
8087 input = xmlNewEntityInputStream(ctxt, entity);
8088 if (input == NULL) {
8089 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8090 "xmlLoadEntityContent input error");
8091 xmlBufferFree(buf);
8092 return(-1);
8093 }
8094
8095 /*
8096 * Push the entity as the current input, read char by char
8097 * saving to the buffer until the end of the entity or an error
8098 */
8099 if (xmlPushInput(ctxt, input) < 0) {
8100 xmlBufferFree(buf);
8101 xmlFreeInputStream(input);
8102 return(-1);
8103 }
8104
8105 GROW;
8106 c = CUR_CHAR(l);
8107 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8108 (IS_CHAR(c))) {
8109 xmlBufferAdd(buf, ctxt->input->cur, l);
8110 if (count++ > XML_PARSER_CHUNK_SIZE) {
8111 count = 0;
8112 GROW;
8113 if (ctxt->instate == XML_PARSER_EOF) {
8114 xmlBufferFree(buf);
8115 return(-1);
8116 }
8117 }
8118 NEXTL(l);
8119 c = CUR_CHAR(l);
8120 if (c == 0) {
8121 count = 0;
8122 GROW;
8123 if (ctxt->instate == XML_PARSER_EOF) {
8124 xmlBufferFree(buf);
8125 return(-1);
8126 }
8127 c = CUR_CHAR(l);
8128 }
8129 }
8130
8131 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8132 xmlPopInput(ctxt);
8133 } else if (!IS_CHAR(c)) {
8134 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8135 "xmlLoadEntityContent: invalid char value %d\n",
8136 c);
8137 xmlBufferFree(buf);
8138 return(-1);
8139 }
8140 entity->content = buf->content;
8141 buf->content = NULL;
8142 xmlBufferFree(buf);
8143
8144 return(0);
8145 }
8146
8147 /**
8148 * xmlParseStringPEReference:
8149 * @ctxt: an XML parser context
8150 * @str: a pointer to an index in the string
8151 *
8152 * parse PEReference declarations
8153 *
8154 * [69] PEReference ::= '%' Name ';'
8155 *
8156 * [ WFC: No Recursion ]
8157 * A parsed entity must not contain a recursive
8158 * reference to itself, either directly or indirectly.
8159 *
8160 * [ WFC: Entity Declared ]
8161 * In a document without any DTD, a document with only an internal DTD
8162 * subset which contains no parameter entity references, or a document
8163 * with "standalone='yes'", ... ... The declaration of a parameter
8164 * entity must precede any reference to it...
8165 *
8166 * [ VC: Entity Declared ]
8167 * In a document with an external subset or external parameter entities
8168 * with "standalone='no'", ... ... The declaration of a parameter entity
8169 * must precede any reference to it...
8170 *
8171 * [ WFC: In DTD ]
8172 * Parameter-entity references may only appear in the DTD.
8173 * NOTE: misleading but this is handled.
8174 *
8175 * Returns the string of the entity content.
8176 * str is updated to the current value of the index
8177 */
8178 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8179 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8180 const xmlChar *ptr;
8181 xmlChar cur;
8182 xmlChar *name;
8183 xmlEntityPtr entity = NULL;
8184
8185 if ((str == NULL) || (*str == NULL)) return(NULL);
8186 ptr = *str;
8187 cur = *ptr;
8188 if (cur != '%')
8189 return(NULL);
8190 ptr++;
8191 name = xmlParseStringName(ctxt, &ptr);
8192 if (name == NULL) {
8193 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194 "xmlParseStringPEReference: no name\n");
8195 *str = ptr;
8196 return(NULL);
8197 }
8198 cur = *ptr;
8199 if (cur != ';') {
8200 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8201 xmlFree(name);
8202 *str = ptr;
8203 return(NULL);
8204 }
8205 ptr++;
8206
8207 /*
8208 * Increase the number of entity references parsed
8209 */
8210 ctxt->nbentities++;
8211
8212 /*
8213 * Request the entity from SAX
8214 */
8215 if ((ctxt->sax != NULL) &&
8216 (ctxt->sax->getParameterEntity != NULL))
8217 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8218 if (ctxt->instate == XML_PARSER_EOF) {
8219 xmlFree(name);
8220 *str = ptr;
8221 return(NULL);
8222 }
8223 if (entity == NULL) {
8224 /*
8225 * [ WFC: Entity Declared ]
8226 * In a document without any DTD, a document with only an
8227 * internal DTD subset which contains no parameter entity
8228 * references, or a document with "standalone='yes'", ...
8229 * ... The declaration of a parameter entity must precede
8230 * any reference to it...
8231 */
8232 if ((ctxt->standalone == 1) ||
8233 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8235 "PEReference: %%%s; not found\n", name);
8236 } else {
8237 /*
8238 * [ VC: Entity Declared ]
8239 * In a document with an external subset or external
8240 * parameter entities with "standalone='no'", ...
8241 * ... The declaration of a parameter entity must
8242 * precede any reference to it...
8243 */
8244 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8245 "PEReference: %%%s; not found\n",
8246 name, NULL);
8247 ctxt->valid = 0;
8248 }
8249 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8250 } else {
8251 /*
8252 * Internal checking in case the entity quest barfed
8253 */
8254 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8255 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8256 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8257 "%%%s; is not a parameter entity\n",
8258 name, NULL);
8259 }
8260 }
8261 ctxt->hasPErefs = 1;
8262 xmlFree(name);
8263 *str = ptr;
8264 return(entity);
8265 }
8266
8267 /**
8268 * xmlParseDocTypeDecl:
8269 * @ctxt: an XML parser context
8270 *
8271 * parse a DOCTYPE declaration
8272 *
8273 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8274 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8275 *
8276 * [ VC: Root Element Type ]
8277 * The Name in the document type declaration must match the element
8278 * type of the root element.
8279 */
8280
8281 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8282 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8283 const xmlChar *name = NULL;
8284 xmlChar *ExternalID = NULL;
8285 xmlChar *URI = NULL;
8286
8287 /*
8288 * We know that '<!DOCTYPE' has been detected.
8289 */
8290 SKIP(9);
8291
8292 SKIP_BLANKS;
8293
8294 /*
8295 * Parse the DOCTYPE name.
8296 */
8297 name = xmlParseName(ctxt);
8298 if (name == NULL) {
8299 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8300 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8301 }
8302 ctxt->intSubName = name;
8303
8304 SKIP_BLANKS;
8305
8306 /*
8307 * Check for SystemID and ExternalID
8308 */
8309 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8310
8311 if ((URI != NULL) || (ExternalID != NULL)) {
8312 ctxt->hasExternalSubset = 1;
8313 }
8314 ctxt->extSubURI = URI;
8315 ctxt->extSubSystem = ExternalID;
8316
8317 SKIP_BLANKS;
8318
8319 /*
8320 * Create and update the internal subset.
8321 */
8322 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8323 (!ctxt->disableSAX))
8324 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8325 if (ctxt->instate == XML_PARSER_EOF)
8326 return;
8327
8328 /*
8329 * Is there any internal subset declarations ?
8330 * they are handled separately in xmlParseInternalSubset()
8331 */
8332 if (RAW == '[')
8333 return;
8334
8335 /*
8336 * We should be at the end of the DOCTYPE declaration.
8337 */
8338 if (RAW != '>') {
8339 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8340 }
8341 NEXT;
8342 }
8343
8344 /**
8345 * xmlParseInternalSubset:
8346 * @ctxt: an XML parser context
8347 *
8348 * parse the internal subset declaration
8349 *
8350 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8351 */
8352
8353 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8354 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8355 /*
8356 * Is there any DTD definition ?
8357 */
8358 if (RAW == '[') {
8359 int baseInputNr = ctxt->inputNr;
8360 ctxt->instate = XML_PARSER_DTD;
8361 NEXT;
8362 /*
8363 * Parse the succession of Markup declarations and
8364 * PEReferences.
8365 * Subsequence (markupdecl | PEReference | S)*
8366 */
8367 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8368 (ctxt->instate != XML_PARSER_EOF)) {
8369 int id = ctxt->input->id;
8370 unsigned long cons = CUR_CONSUMED;
8371
8372 SKIP_BLANKS;
8373 xmlParseMarkupDecl(ctxt);
8374 xmlParsePEReference(ctxt);
8375
8376 /*
8377 * Conditional sections are allowed from external entities included
8378 * by PE References in the internal subset.
8379 */
8380 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8381 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8382 xmlParseConditionalSections(ctxt);
8383 }
8384
8385 if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8386 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8387 "xmlParseInternalSubset: error detected in Markup declaration\n");
8388 if (ctxt->inputNr > baseInputNr)
8389 xmlPopInput(ctxt);
8390 else
8391 break;
8392 }
8393 }
8394 if (RAW == ']') {
8395 NEXT;
8396 SKIP_BLANKS;
8397 }
8398 }
8399
8400 /*
8401 * We should be at the end of the DOCTYPE declaration.
8402 */
8403 if (RAW != '>') {
8404 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8405 return;
8406 }
8407 NEXT;
8408 }
8409
8410 #ifdef LIBXML_SAX1_ENABLED
8411 /**
8412 * xmlParseAttribute:
8413 * @ctxt: an XML parser context
8414 * @value: a xmlChar ** used to store the value of the attribute
8415 *
8416 * parse an attribute
8417 *
8418 * [41] Attribute ::= Name Eq AttValue
8419 *
8420 * [ WFC: No External Entity References ]
8421 * Attribute values cannot contain direct or indirect entity references
8422 * to external entities.
8423 *
8424 * [ WFC: No < in Attribute Values ]
8425 * The replacement text of any entity referred to directly or indirectly in
8426 * an attribute value (other than "<") must not contain a <.
8427 *
8428 * [ VC: Attribute Value Type ]
8429 * The attribute must have been declared; the value must be of the type
8430 * declared for it.
8431 *
8432 * [25] Eq ::= S? '=' S?
8433 *
8434 * With namespace:
8435 *
8436 * [NS 11] Attribute ::= QName Eq AttValue
8437 *
8438 * Also the case QName == xmlns:??? is handled independently as a namespace
8439 * definition.
8440 *
8441 * Returns the attribute name, and the value in *value.
8442 */
8443
8444 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8445 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8446 const xmlChar *name;
8447 xmlChar *val;
8448
8449 *value = NULL;
8450 GROW;
8451 name = xmlParseName(ctxt);
8452 if (name == NULL) {
8453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8454 "error parsing attribute name\n");
8455 return(NULL);
8456 }
8457
8458 /*
8459 * read the value
8460 */
8461 SKIP_BLANKS;
8462 if (RAW == '=') {
8463 NEXT;
8464 SKIP_BLANKS;
8465 val = xmlParseAttValue(ctxt);
8466 ctxt->instate = XML_PARSER_CONTENT;
8467 } else {
8468 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8469 "Specification mandates value for attribute %s\n", name);
8470 return(NULL);
8471 }
8472
8473 /*
8474 * Check that xml:lang conforms to the specification
8475 * No more registered as an error, just generate a warning now
8476 * since this was deprecated in XML second edition
8477 */
8478 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8479 if (!xmlCheckLanguageID(val)) {
8480 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8481 "Malformed value for xml:lang : %s\n",
8482 val, NULL);
8483 }
8484 }
8485
8486 /*
8487 * Check that xml:space conforms to the specification
8488 */
8489 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8490 if (xmlStrEqual(val, BAD_CAST "default"))
8491 *(ctxt->space) = 0;
8492 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8493 *(ctxt->space) = 1;
8494 else {
8495 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8496 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8497 val, NULL);
8498 }
8499 }
8500
8501 *value = val;
8502 return(name);
8503 }
8504
8505 /**
8506 * xmlParseStartTag:
8507 * @ctxt: an XML parser context
8508 *
8509 * parse a start of tag either for rule element or
8510 * EmptyElement. In both case we don't parse the tag closing chars.
8511 *
8512 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8513 *
8514 * [ WFC: Unique Att Spec ]
8515 * No attribute name may appear more than once in the same start-tag or
8516 * empty-element tag.
8517 *
8518 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8519 *
8520 * [ WFC: Unique Att Spec ]
8521 * No attribute name may appear more than once in the same start-tag or
8522 * empty-element tag.
8523 *
8524 * With namespace:
8525 *
8526 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8527 *
8528 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8529 *
8530 * Returns the element name parsed
8531 */
8532
8533 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8534 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8535 const xmlChar *name;
8536 const xmlChar *attname;
8537 xmlChar *attvalue;
8538 const xmlChar **atts = ctxt->atts;
8539 int nbatts = 0;
8540 int maxatts = ctxt->maxatts;
8541 int i;
8542
8543 if (RAW != '<') return(NULL);
8544 NEXT1;
8545
8546 name = xmlParseName(ctxt);
8547 if (name == NULL) {
8548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8549 "xmlParseStartTag: invalid element name\n");
8550 return(NULL);
8551 }
8552
8553 /*
8554 * Now parse the attributes, it ends up with the ending
8555 *
8556 * (S Attribute)* S?
8557 */
8558 SKIP_BLANKS;
8559 GROW;
8560
8561 while (((RAW != '>') &&
8562 ((RAW != '/') || (NXT(1) != '>')) &&
8563 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8564 int id = ctxt->input->id;
8565 unsigned long cons = CUR_CONSUMED;
8566
8567 attname = xmlParseAttribute(ctxt, &attvalue);
8568 if ((attname != NULL) && (attvalue != NULL)) {
8569 /*
8570 * [ WFC: Unique Att Spec ]
8571 * No attribute name may appear more than once in the same
8572 * start-tag or empty-element tag.
8573 */
8574 for (i = 0; i < nbatts;i += 2) {
8575 if (xmlStrEqual(atts[i], attname)) {
8576 xmlErrAttributeDup(ctxt, NULL, attname);
8577 xmlFree(attvalue);
8578 goto failed;
8579 }
8580 }
8581 /*
8582 * Add the pair to atts
8583 */
8584 if (atts == NULL) {
8585 maxatts = 22; /* allow for 10 attrs by default */
8586 atts = (const xmlChar **)
8587 xmlMalloc(maxatts * sizeof(xmlChar *));
8588 if (atts == NULL) {
8589 xmlErrMemory(ctxt, NULL);
8590 if (attvalue != NULL)
8591 xmlFree(attvalue);
8592 goto failed;
8593 }
8594 ctxt->atts = atts;
8595 ctxt->maxatts = maxatts;
8596 } else if (nbatts + 4 > maxatts) {
8597 const xmlChar **n;
8598
8599 maxatts *= 2;
8600 n = (const xmlChar **) xmlRealloc((void *) atts,
8601 maxatts * sizeof(const xmlChar *));
8602 if (n == NULL) {
8603 xmlErrMemory(ctxt, NULL);
8604 if (attvalue != NULL)
8605 xmlFree(attvalue);
8606 goto failed;
8607 }
8608 atts = n;
8609 ctxt->atts = atts;
8610 ctxt->maxatts = maxatts;
8611 }
8612 atts[nbatts++] = attname;
8613 atts[nbatts++] = attvalue;
8614 atts[nbatts] = NULL;
8615 atts[nbatts + 1] = NULL;
8616 } else {
8617 if (attvalue != NULL)
8618 xmlFree(attvalue);
8619 }
8620
8621 failed:
8622
8623 GROW
8624 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8625 break;
8626 if (SKIP_BLANKS == 0) {
8627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8628 "attributes construct error\n");
8629 }
8630 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8631 (attname == NULL) && (attvalue == NULL)) {
8632 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8633 "xmlParseStartTag: problem parsing attributes\n");
8634 break;
8635 }
8636 SHRINK;
8637 GROW;
8638 }
8639
8640 /*
8641 * SAX: Start of Element !
8642 */
8643 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8644 (!ctxt->disableSAX)) {
8645 if (nbatts > 0)
8646 ctxt->sax->startElement(ctxt->userData, name, atts);
8647 else
8648 ctxt->sax->startElement(ctxt->userData, name, NULL);
8649 }
8650
8651 if (atts != NULL) {
8652 /* Free only the content strings */
8653 for (i = 1;i < nbatts;i+=2)
8654 if (atts[i] != NULL)
8655 xmlFree((xmlChar *) atts[i]);
8656 }
8657 return(name);
8658 }
8659
8660 /**
8661 * xmlParseEndTag1:
8662 * @ctxt: an XML parser context
8663 * @line: line of the start tag
8664 * @nsNr: number of namespaces on the start tag
8665 *
8666 * parse an end of tag
8667 *
8668 * [42] ETag ::= '</' Name S? '>'
8669 *
8670 * With namespace
8671 *
8672 * [NS 9] ETag ::= '</' QName S? '>'
8673 */
8674
8675 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8676 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8677 const xmlChar *name;
8678
8679 GROW;
8680 if ((RAW != '<') || (NXT(1) != '/')) {
8681 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8682 "xmlParseEndTag: '</' not found\n");
8683 return;
8684 }
8685 SKIP(2);
8686
8687 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8688
8689 /*
8690 * We should definitely be at the ending "S? '>'" part
8691 */
8692 GROW;
8693 SKIP_BLANKS;
8694 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8695 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8696 } else
8697 NEXT1;
8698
8699 /*
8700 * [ WFC: Element Type Match ]
8701 * The Name in an element's end-tag must match the element type in the
8702 * start-tag.
8703 *
8704 */
8705 if (name != (xmlChar*)1) {
8706 if (name == NULL) name = BAD_CAST "unparsable";
8707 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8708 "Opening and ending tag mismatch: %s line %d and %s\n",
8709 ctxt->name, line, name);
8710 }
8711
8712 /*
8713 * SAX: End of Tag
8714 */
8715 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8716 (!ctxt->disableSAX))
8717 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8718
8719 namePop(ctxt);
8720 spacePop(ctxt);
8721 return;
8722 }
8723
8724 /**
8725 * xmlParseEndTag:
8726 * @ctxt: an XML parser context
8727 *
8728 * parse an end of tag
8729 *
8730 * [42] ETag ::= '</' Name S? '>'
8731 *
8732 * With namespace
8733 *
8734 * [NS 9] ETag ::= '</' QName S? '>'
8735 */
8736
8737 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8738 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8739 xmlParseEndTag1(ctxt, 0);
8740 }
8741 #endif /* LIBXML_SAX1_ENABLED */
8742
8743 /************************************************************************
8744 * *
8745 * SAX 2 specific operations *
8746 * *
8747 ************************************************************************/
8748
8749 /*
8750 * xmlGetNamespace:
8751 * @ctxt: an XML parser context
8752 * @prefix: the prefix to lookup
8753 *
8754 * Lookup the namespace name for the @prefix (which ca be NULL)
8755 * The prefix must come from the @ctxt->dict dictionary
8756 *
8757 * Returns the namespace name or NULL if not bound
8758 */
8759 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8760 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8761 int i;
8762
8763 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8764 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8765 if (ctxt->nsTab[i] == prefix) {
8766 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8767 return(NULL);
8768 return(ctxt->nsTab[i + 1]);
8769 }
8770 return(NULL);
8771 }
8772
8773 /**
8774 * xmlParseQName:
8775 * @ctxt: an XML parser context
8776 * @prefix: pointer to store the prefix part
8777 *
8778 * parse an XML Namespace QName
8779 *
8780 * [6] QName ::= (Prefix ':')? LocalPart
8781 * [7] Prefix ::= NCName
8782 * [8] LocalPart ::= NCName
8783 *
8784 * Returns the Name parsed or NULL
8785 */
8786
8787 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8788 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8789 const xmlChar *l, *p;
8790
8791 GROW;
8792
8793 l = xmlParseNCName(ctxt);
8794 if (l == NULL) {
8795 if (CUR == ':') {
8796 l = xmlParseName(ctxt);
8797 if (l != NULL) {
8798 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799 "Failed to parse QName '%s'\n", l, NULL, NULL);
8800 *prefix = NULL;
8801 return(l);
8802 }
8803 }
8804 return(NULL);
8805 }
8806 if (CUR == ':') {
8807 NEXT;
8808 p = l;
8809 l = xmlParseNCName(ctxt);
8810 if (l == NULL) {
8811 xmlChar *tmp;
8812
8813 if (ctxt->instate == XML_PARSER_EOF)
8814 return(NULL);
8815 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8816 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8817 l = xmlParseNmtoken(ctxt);
8818 if (l == NULL) {
8819 if (ctxt->instate == XML_PARSER_EOF)
8820 return(NULL);
8821 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8822 } else {
8823 tmp = xmlBuildQName(l, p, NULL, 0);
8824 xmlFree((char *)l);
8825 }
8826 p = xmlDictLookup(ctxt->dict, tmp, -1);
8827 if (tmp != NULL) xmlFree(tmp);
8828 *prefix = NULL;
8829 return(p);
8830 }
8831 if (CUR == ':') {
8832 xmlChar *tmp;
8833
8834 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8835 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8836 NEXT;
8837 tmp = (xmlChar *) xmlParseName(ctxt);
8838 if (tmp != NULL) {
8839 tmp = xmlBuildQName(tmp, l, NULL, 0);
8840 l = xmlDictLookup(ctxt->dict, tmp, -1);
8841 if (tmp != NULL) xmlFree(tmp);
8842 *prefix = p;
8843 return(l);
8844 }
8845 if (ctxt->instate == XML_PARSER_EOF)
8846 return(NULL);
8847 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8848 l = xmlDictLookup(ctxt->dict, tmp, -1);
8849 if (tmp != NULL) xmlFree(tmp);
8850 *prefix = p;
8851 return(l);
8852 }
8853 *prefix = p;
8854 } else
8855 *prefix = NULL;
8856 return(l);
8857 }
8858
8859 /**
8860 * xmlParseQNameAndCompare:
8861 * @ctxt: an XML parser context
8862 * @name: the localname
8863 * @prefix: the prefix, if any.
8864 *
8865 * parse an XML name and compares for match
8866 * (specialized for endtag parsing)
8867 *
8868 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8869 * and the name for mismatch
8870 */
8871
8872 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8873 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8874 xmlChar const *prefix) {
8875 const xmlChar *cmp;
8876 const xmlChar *in;
8877 const xmlChar *ret;
8878 const xmlChar *prefix2;
8879
8880 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8881
8882 GROW;
8883 in = ctxt->input->cur;
8884
8885 cmp = prefix;
8886 while (*in != 0 && *in == *cmp) {
8887 ++in;
8888 ++cmp;
8889 }
8890 if ((*cmp == 0) && (*in == ':')) {
8891 in++;
8892 cmp = name;
8893 while (*in != 0 && *in == *cmp) {
8894 ++in;
8895 ++cmp;
8896 }
8897 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8898 /* success */
8899 ctxt->input->col += in - ctxt->input->cur;
8900 ctxt->input->cur = in;
8901 return((const xmlChar*) 1);
8902 }
8903 }
8904 /*
8905 * all strings coms from the dictionary, equality can be done directly
8906 */
8907 ret = xmlParseQName (ctxt, &prefix2);
8908 if ((ret == name) && (prefix == prefix2))
8909 return((const xmlChar*) 1);
8910 return ret;
8911 }
8912
8913 /**
8914 * xmlParseAttValueInternal:
8915 * @ctxt: an XML parser context
8916 * @len: attribute len result
8917 * @alloc: whether the attribute was reallocated as a new string
8918 * @normalize: if 1 then further non-CDATA normalization must be done
8919 *
8920 * parse a value for an attribute.
8921 * NOTE: if no normalization is needed, the routine will return pointers
8922 * directly from the data buffer.
8923 *
8924 * 3.3.3 Attribute-Value Normalization:
8925 * Before the value of an attribute is passed to the application or
8926 * checked for validity, the XML processor must normalize it as follows:
8927 * - a character reference is processed by appending the referenced
8928 * character to the attribute value
8929 * - an entity reference is processed by recursively processing the
8930 * replacement text of the entity
8931 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8932 * appending #x20 to the normalized value, except that only a single
8933 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8934 * parsed entity or the literal entity value of an internal parsed entity
8935 * - other characters are processed by appending them to the normalized value
8936 * If the declared value is not CDATA, then the XML processor must further
8937 * process the normalized attribute value by discarding any leading and
8938 * trailing space (#x20) characters, and by replacing sequences of space
8939 * (#x20) characters by a single space (#x20) character.
8940 * All attributes for which no declaration has been read should be treated
8941 * by a non-validating parser as if declared CDATA.
8942 *
8943 * Returns the AttValue parsed or NULL. The value has to be freed by the
8944 * caller if it was copied, this can be detected by val[*len] == 0.
8945 */
8946
8947 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8948 const xmlChar *oldbase = ctxt->input->base;\
8949 GROW;\
8950 if (ctxt->instate == XML_PARSER_EOF)\
8951 return(NULL);\
8952 if (oldbase != ctxt->input->base) {\
8953 ptrdiff_t delta = ctxt->input->base - oldbase;\
8954 start = start + delta;\
8955 in = in + delta;\
8956 }\
8957 end = ctxt->input->end;
8958
8959 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8960 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8961 int normalize)
8962 {
8963 xmlChar limit = 0;
8964 const xmlChar *in = NULL, *start, *end, *last;
8965 xmlChar *ret = NULL;
8966 int line, col;
8967 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8968 XML_MAX_HUGE_LENGTH :
8969 XML_MAX_TEXT_LENGTH;
8970
8971 GROW;
8972 in = (xmlChar *) CUR_PTR;
8973 line = ctxt->input->line;
8974 col = ctxt->input->col;
8975 if (*in != '"' && *in != '\'') {
8976 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8977 return (NULL);
8978 }
8979 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8980
8981 /*
8982 * try to handle in this routine the most common case where no
8983 * allocation of a new string is required and where content is
8984 * pure ASCII.
8985 */
8986 limit = *in++;
8987 col++;
8988 end = ctxt->input->end;
8989 start = in;
8990 if (in >= end) {
8991 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992 }
8993 if (normalize) {
8994 /*
8995 * Skip any leading spaces
8996 */
8997 while ((in < end) && (*in != limit) &&
8998 ((*in == 0x20) || (*in == 0x9) ||
8999 (*in == 0xA) || (*in == 0xD))) {
9000 if (*in == 0xA) {
9001 line++; col = 1;
9002 } else {
9003 col++;
9004 }
9005 in++;
9006 start = in;
9007 if (in >= end) {
9008 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9009 if ((in - start) > maxLength) {
9010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9011 "AttValue length too long\n");
9012 return(NULL);
9013 }
9014 }
9015 }
9016 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9017 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9018 col++;
9019 if ((*in++ == 0x20) && (*in == 0x20)) break;
9020 if (in >= end) {
9021 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9022 if ((in - start) > maxLength) {
9023 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9024 "AttValue length too long\n");
9025 return(NULL);
9026 }
9027 }
9028 }
9029 last = in;
9030 /*
9031 * skip the trailing blanks
9032 */
9033 while ((last[-1] == 0x20) && (last > start)) last--;
9034 while ((in < end) && (*in != limit) &&
9035 ((*in == 0x20) || (*in == 0x9) ||
9036 (*in == 0xA) || (*in == 0xD))) {
9037 if (*in == 0xA) {
9038 line++, col = 1;
9039 } else {
9040 col++;
9041 }
9042 in++;
9043 if (in >= end) {
9044 const xmlChar *oldbase = ctxt->input->base;
9045 GROW;
9046 if (ctxt->instate == XML_PARSER_EOF)
9047 return(NULL);
9048 if (oldbase != ctxt->input->base) {
9049 ptrdiff_t delta = ctxt->input->base - oldbase;
9050 start = start + delta;
9051 in = in + delta;
9052 last = last + delta;
9053 }
9054 end = ctxt->input->end;
9055 if ((in - start) > maxLength) {
9056 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9057 "AttValue length too long\n");
9058 return(NULL);
9059 }
9060 }
9061 }
9062 if ((in - start) > maxLength) {
9063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064 "AttValue length too long\n");
9065 return(NULL);
9066 }
9067 if (*in != limit) goto need_complex;
9068 } else {
9069 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9070 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9071 in++;
9072 col++;
9073 if (in >= end) {
9074 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9075 if ((in - start) > maxLength) {
9076 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9077 "AttValue length too long\n");
9078 return(NULL);
9079 }
9080 }
9081 }
9082 last = in;
9083 if ((in - start) > maxLength) {
9084 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9085 "AttValue length too long\n");
9086 return(NULL);
9087 }
9088 if (*in != limit) goto need_complex;
9089 }
9090 in++;
9091 col++;
9092 if (len != NULL) {
9093 *len = last - start;
9094 ret = (xmlChar *) start;
9095 } else {
9096 if (alloc) *alloc = 1;
9097 ret = xmlStrndup(start, last - start);
9098 }
9099 CUR_PTR = in;
9100 ctxt->input->line = line;
9101 ctxt->input->col = col;
9102 if (alloc) *alloc = 0;
9103 return ret;
9104 need_complex:
9105 if (alloc) *alloc = 1;
9106 return xmlParseAttValueComplex(ctxt, len, normalize);
9107 }
9108
9109 /**
9110 * xmlParseAttribute2:
9111 * @ctxt: an XML parser context
9112 * @pref: the element prefix
9113 * @elem: the element name
9114 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9115 * @value: a xmlChar ** used to store the value of the attribute
9116 * @len: an int * to save the length of the attribute
9117 * @alloc: an int * to indicate if the attribute was allocated
9118 *
9119 * parse an attribute in the new SAX2 framework.
9120 *
9121 * Returns the attribute name, and the value in *value, .
9122 */
9123
9124 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9125 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9126 const xmlChar * pref, const xmlChar * elem,
9127 const xmlChar ** prefix, xmlChar ** value,
9128 int *len, int *alloc)
9129 {
9130 const xmlChar *name;
9131 xmlChar *val, *internal_val = NULL;
9132 int normalize = 0;
9133
9134 *value = NULL;
9135 GROW;
9136 name = xmlParseQName(ctxt, prefix);
9137 if (name == NULL) {
9138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9139 "error parsing attribute name\n");
9140 return (NULL);
9141 }
9142
9143 /*
9144 * get the type if needed
9145 */
9146 if (ctxt->attsSpecial != NULL) {
9147 int type;
9148
9149 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9150 pref, elem, *prefix, name);
9151 if (type != 0)
9152 normalize = 1;
9153 }
9154
9155 /*
9156 * read the value
9157 */
9158 SKIP_BLANKS;
9159 if (RAW == '=') {
9160 NEXT;
9161 SKIP_BLANKS;
9162 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9163 if (normalize) {
9164 /*
9165 * Sometimes a second normalisation pass for spaces is needed
9166 * but that only happens if charrefs or entities references
9167 * have been used in the attribute value, i.e. the attribute
9168 * value have been extracted in an allocated string already.
9169 */
9170 if (*alloc) {
9171 const xmlChar *val2;
9172
9173 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9174 if ((val2 != NULL) && (val2 != val)) {
9175 xmlFree(val);
9176 val = (xmlChar *) val2;
9177 }
9178 }
9179 }
9180 ctxt->instate = XML_PARSER_CONTENT;
9181 } else {
9182 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9183 "Specification mandates value for attribute %s\n",
9184 name);
9185 return (NULL);
9186 }
9187
9188 if (*prefix == ctxt->str_xml) {
9189 /*
9190 * Check that xml:lang conforms to the specification
9191 * No more registered as an error, just generate a warning now
9192 * since this was deprecated in XML second edition
9193 */
9194 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9195 internal_val = xmlStrndup(val, *len);
9196 if (!xmlCheckLanguageID(internal_val)) {
9197 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9198 "Malformed value for xml:lang : %s\n",
9199 internal_val, NULL);
9200 }
9201 }
9202
9203 /*
9204 * Check that xml:space conforms to the specification
9205 */
9206 if (xmlStrEqual(name, BAD_CAST "space")) {
9207 internal_val = xmlStrndup(val, *len);
9208 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9209 *(ctxt->space) = 0;
9210 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9211 *(ctxt->space) = 1;
9212 else {
9213 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9214 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215 internal_val, NULL);
9216 }
9217 }
9218 if (internal_val) {
9219 xmlFree(internal_val);
9220 }
9221 }
9222
9223 *value = val;
9224 return (name);
9225 }
9226 /**
9227 * xmlParseStartTag2:
9228 * @ctxt: an XML parser context
9229 *
9230 * parse a start of tag either for rule element or
9231 * EmptyElement. In both case we don't parse the tag closing chars.
9232 * This routine is called when running SAX2 parsing
9233 *
9234 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9235 *
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
9238 * empty-element tag.
9239 *
9240 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9241 *
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
9244 * empty-element tag.
9245 *
9246 * With namespace:
9247 *
9248 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9249 *
9250 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9251 *
9252 * Returns the element name parsed
9253 */
9254
9255 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9256 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9257 const xmlChar **URI, int *tlen) {
9258 const xmlChar *localname;
9259 const xmlChar *prefix;
9260 const xmlChar *attname;
9261 const xmlChar *aprefix;
9262 const xmlChar *nsname;
9263 xmlChar *attvalue;
9264 const xmlChar **atts = ctxt->atts;
9265 int maxatts = ctxt->maxatts;
9266 int nratts, nbatts, nbdef, inputid;
9267 int i, j, nbNs, attval;
9268 unsigned long cur;
9269 int nsNr = ctxt->nsNr;
9270
9271 if (RAW != '<') return(NULL);
9272 NEXT1;
9273
9274 /*
9275 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276 * point since the attribute values may be stored as pointers to
9277 * the buffer and calling SHRINK would destroy them !
9278 * The Shrinking is only possible once the full set of attribute
9279 * callbacks have been done.
9280 */
9281 SHRINK;
9282 cur = ctxt->input->cur - ctxt->input->base;
9283 inputid = ctxt->input->id;
9284 nbatts = 0;
9285 nratts = 0;
9286 nbdef = 0;
9287 nbNs = 0;
9288 attval = 0;
9289 /* Forget any namespaces added during an earlier parse of this element. */
9290 ctxt->nsNr = nsNr;
9291
9292 localname = xmlParseQName(ctxt, &prefix);
9293 if (localname == NULL) {
9294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295 "StartTag: invalid element name\n");
9296 return(NULL);
9297 }
9298 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9299
9300 /*
9301 * Now parse the attributes, it ends up with the ending
9302 *
9303 * (S Attribute)* S?
9304 */
9305 SKIP_BLANKS;
9306 GROW;
9307
9308 while (((RAW != '>') &&
9309 ((RAW != '/') || (NXT(1) != '>')) &&
9310 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9311 int id = ctxt->input->id;
9312 unsigned long cons = CUR_CONSUMED;
9313 int len = -1, alloc = 0;
9314
9315 attname = xmlParseAttribute2(ctxt, prefix, localname,
9316 &aprefix, &attvalue, &len, &alloc);
9317 if ((attname == NULL) || (attvalue == NULL))
9318 goto next_attr;
9319 if (len < 0) len = xmlStrlen(attvalue);
9320
9321 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9322 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9323 xmlURIPtr uri;
9324
9325 if (URL == NULL) {
9326 xmlErrMemory(ctxt, "dictionary allocation failure");
9327 if ((attvalue != NULL) && (alloc != 0))
9328 xmlFree(attvalue);
9329 localname = NULL;
9330 goto done;
9331 }
9332 if (*URL != 0) {
9333 uri = xmlParseURI((const char *) URL);
9334 if (uri == NULL) {
9335 xmlNsErr(ctxt, XML_WAR_NS_URI,
9336 "xmlns: '%s' is not a valid URI\n",
9337 URL, NULL, NULL);
9338 } else {
9339 if (uri->scheme == NULL) {
9340 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9341 "xmlns: URI %s is not absolute\n",
9342 URL, NULL, NULL);
9343 }
9344 xmlFreeURI(uri);
9345 }
9346 if (URL == ctxt->str_xml_ns) {
9347 if (attname != ctxt->str_xml) {
9348 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349 "xml namespace URI cannot be the default namespace\n",
9350 NULL, NULL, NULL);
9351 }
9352 goto next_attr;
9353 }
9354 if ((len == 29) &&
9355 (xmlStrEqual(URL,
9356 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9357 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9358 "reuse of the xmlns namespace name is forbidden\n",
9359 NULL, NULL, NULL);
9360 goto next_attr;
9361 }
9362 }
9363 /*
9364 * check that it's not a defined namespace
9365 */
9366 for (j = 1;j <= nbNs;j++)
9367 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9368 break;
9369 if (j <= nbNs)
9370 xmlErrAttributeDup(ctxt, NULL, attname);
9371 else
9372 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9373
9374 } else if (aprefix == ctxt->str_xmlns) {
9375 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9376 xmlURIPtr uri;
9377
9378 if (attname == ctxt->str_xml) {
9379 if (URL != ctxt->str_xml_ns) {
9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381 "xml namespace prefix mapped to wrong URI\n",
9382 NULL, NULL, NULL);
9383 }
9384 /*
9385 * Do not keep a namespace definition node
9386 */
9387 goto next_attr;
9388 }
9389 if (URL == ctxt->str_xml_ns) {
9390 if (attname != ctxt->str_xml) {
9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392 "xml namespace URI mapped to wrong prefix\n",
9393 NULL, NULL, NULL);
9394 }
9395 goto next_attr;
9396 }
9397 if (attname == ctxt->str_xmlns) {
9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "redefinition of the xmlns prefix is forbidden\n",
9400 NULL, NULL, NULL);
9401 goto next_attr;
9402 }
9403 if ((len == 29) &&
9404 (xmlStrEqual(URL,
9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "reuse of the xmlns namespace name is forbidden\n",
9408 NULL, NULL, NULL);
9409 goto next_attr;
9410 }
9411 if ((URL == NULL) || (URL[0] == 0)) {
9412 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9413 "xmlns:%s: Empty XML namespace is not allowed\n",
9414 attname, NULL, NULL);
9415 goto next_attr;
9416 } else {
9417 uri = xmlParseURI((const char *) URL);
9418 if (uri == NULL) {
9419 xmlNsErr(ctxt, XML_WAR_NS_URI,
9420 "xmlns:%s: '%s' is not a valid URI\n",
9421 attname, URL, NULL);
9422 } else {
9423 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9424 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9425 "xmlns:%s: URI %s is not absolute\n",
9426 attname, URL, NULL);
9427 }
9428 xmlFreeURI(uri);
9429 }
9430 }
9431
9432 /*
9433 * check that it's not a defined namespace
9434 */
9435 for (j = 1;j <= nbNs;j++)
9436 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9437 break;
9438 if (j <= nbNs)
9439 xmlErrAttributeDup(ctxt, aprefix, attname);
9440 else
9441 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9442
9443 } else {
9444 /*
9445 * Add the pair to atts
9446 */
9447 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9448 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9449 goto next_attr;
9450 }
9451 maxatts = ctxt->maxatts;
9452 atts = ctxt->atts;
9453 }
9454 ctxt->attallocs[nratts++] = alloc;
9455 atts[nbatts++] = attname;
9456 atts[nbatts++] = aprefix;
9457 /*
9458 * The namespace URI field is used temporarily to point at the
9459 * base of the current input buffer for non-alloced attributes.
9460 * When the input buffer is reallocated, all the pointers become
9461 * invalid, but they can be reconstructed later.
9462 */
9463 if (alloc)
9464 atts[nbatts++] = NULL;
9465 else
9466 atts[nbatts++] = ctxt->input->base;
9467 atts[nbatts++] = attvalue;
9468 attvalue += len;
9469 atts[nbatts++] = attvalue;
9470 /*
9471 * tag if some deallocation is needed
9472 */
9473 if (alloc != 0) attval = 1;
9474 attvalue = NULL; /* moved into atts */
9475 }
9476
9477 next_attr:
9478 if ((attvalue != NULL) && (alloc != 0)) {
9479 xmlFree(attvalue);
9480 attvalue = NULL;
9481 }
9482
9483 GROW
9484 if (ctxt->instate == XML_PARSER_EOF)
9485 break;
9486 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9487 break;
9488 if (SKIP_BLANKS == 0) {
9489 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9490 "attributes construct error\n");
9491 break;
9492 }
9493 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9494 (attname == NULL) && (attvalue == NULL)) {
9495 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9496 "xmlParseStartTag: problem parsing attributes\n");
9497 break;
9498 }
9499 GROW;
9500 }
9501
9502 if (ctxt->input->id != inputid) {
9503 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9504 "Unexpected change of input\n");
9505 localname = NULL;
9506 goto done;
9507 }
9508
9509 /* Reconstruct attribute value pointers. */
9510 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9511 if (atts[i+2] != NULL) {
9512 /*
9513 * Arithmetic on dangling pointers is technically undefined
9514 * behavior, but well...
9515 */
9516 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9517 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9518 atts[i+3] += offset; /* value */
9519 atts[i+4] += offset; /* valuend */
9520 }
9521 }
9522
9523 /*
9524 * The attributes defaulting
9525 */
9526 if (ctxt->attsDefault != NULL) {
9527 xmlDefAttrsPtr defaults;
9528
9529 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9530 if (defaults != NULL) {
9531 for (i = 0;i < defaults->nbAttrs;i++) {
9532 attname = defaults->values[5 * i];
9533 aprefix = defaults->values[5 * i + 1];
9534
9535 /*
9536 * special work for namespaces defaulted defs
9537 */
9538 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9539 /*
9540 * check that it's not a defined namespace
9541 */
9542 for (j = 1;j <= nbNs;j++)
9543 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9544 break;
9545 if (j <= nbNs) continue;
9546
9547 nsname = xmlGetNamespace(ctxt, NULL);
9548 if (nsname != defaults->values[5 * i + 2]) {
9549 if (nsPush(ctxt, NULL,
9550 defaults->values[5 * i + 2]) > 0)
9551 nbNs++;
9552 }
9553 } else if (aprefix == ctxt->str_xmlns) {
9554 /*
9555 * check that it's not a defined namespace
9556 */
9557 for (j = 1;j <= nbNs;j++)
9558 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9559 break;
9560 if (j <= nbNs) continue;
9561
9562 nsname = xmlGetNamespace(ctxt, attname);
9563 if (nsname != defaults->values[2]) {
9564 if (nsPush(ctxt, attname,
9565 defaults->values[5 * i + 2]) > 0)
9566 nbNs++;
9567 }
9568 } else {
9569 /*
9570 * check that it's not a defined attribute
9571 */
9572 for (j = 0;j < nbatts;j+=5) {
9573 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9574 break;
9575 }
9576 if (j < nbatts) continue;
9577
9578 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9579 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9580 localname = NULL;
9581 goto done;
9582 }
9583 maxatts = ctxt->maxatts;
9584 atts = ctxt->atts;
9585 }
9586 atts[nbatts++] = attname;
9587 atts[nbatts++] = aprefix;
9588 if (aprefix == NULL)
9589 atts[nbatts++] = NULL;
9590 else
9591 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9592 atts[nbatts++] = defaults->values[5 * i + 2];
9593 atts[nbatts++] = defaults->values[5 * i + 3];
9594 if ((ctxt->standalone == 1) &&
9595 (defaults->values[5 * i + 4] != NULL)) {
9596 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9597 "standalone: attribute %s on %s defaulted from external subset\n",
9598 attname, localname);
9599 }
9600 nbdef++;
9601 }
9602 }
9603 }
9604 }
9605
9606 /*
9607 * The attributes checkings
9608 */
9609 for (i = 0; i < nbatts;i += 5) {
9610 /*
9611 * The default namespace does not apply to attribute names.
9612 */
9613 if (atts[i + 1] != NULL) {
9614 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9615 if (nsname == NULL) {
9616 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9617 "Namespace prefix %s for %s on %s is not defined\n",
9618 atts[i + 1], atts[i], localname);
9619 }
9620 atts[i + 2] = nsname;
9621 } else
9622 nsname = NULL;
9623 /*
9624 * [ WFC: Unique Att Spec ]
9625 * No attribute name may appear more than once in the same
9626 * start-tag or empty-element tag.
9627 * As extended by the Namespace in XML REC.
9628 */
9629 for (j = 0; j < i;j += 5) {
9630 if (atts[i] == atts[j]) {
9631 if (atts[i+1] == atts[j+1]) {
9632 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9633 break;
9634 }
9635 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9636 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9637 "Namespaced Attribute %s in '%s' redefined\n",
9638 atts[i], nsname, NULL);
9639 break;
9640 }
9641 }
9642 }
9643 }
9644
9645 nsname = xmlGetNamespace(ctxt, prefix);
9646 if ((prefix != NULL) && (nsname == NULL)) {
9647 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9648 "Namespace prefix %s on %s is not defined\n",
9649 prefix, localname, NULL);
9650 }
9651 *pref = prefix;
9652 *URI = nsname;
9653
9654 /*
9655 * SAX: Start of Element !
9656 */
9657 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9658 (!ctxt->disableSAX)) {
9659 if (nbNs > 0)
9660 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9661 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9662 nbatts / 5, nbdef, atts);
9663 else
9664 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9665 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9666 }
9667
9668 done:
9669 /*
9670 * Free up attribute allocated strings if needed
9671 */
9672 if (attval != 0) {
9673 for (i = 3,j = 0; j < nratts;i += 5,j++)
9674 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9675 xmlFree((xmlChar *) atts[i]);
9676 }
9677
9678 return(localname);
9679 }
9680
9681 /**
9682 * xmlParseEndTag2:
9683 * @ctxt: an XML parser context
9684 * @line: line of the start tag
9685 * @nsNr: number of namespaces on the start tag
9686 *
9687 * parse an end of tag
9688 *
9689 * [42] ETag ::= '</' Name S? '>'
9690 *
9691 * With namespace
9692 *
9693 * [NS 9] ETag ::= '</' QName S? '>'
9694 */
9695
9696 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9697 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9698 const xmlChar *name;
9699
9700 GROW;
9701 if ((RAW != '<') || (NXT(1) != '/')) {
9702 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9703 return;
9704 }
9705 SKIP(2);
9706
9707 if (tag->prefix == NULL)
9708 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9709 else
9710 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9711
9712 /*
9713 * We should definitely be at the ending "S? '>'" part
9714 */
9715 GROW;
9716 if (ctxt->instate == XML_PARSER_EOF)
9717 return;
9718 SKIP_BLANKS;
9719 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9720 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9721 } else
9722 NEXT1;
9723
9724 /*
9725 * [ WFC: Element Type Match ]
9726 * The Name in an element's end-tag must match the element type in the
9727 * start-tag.
9728 *
9729 */
9730 if (name != (xmlChar*)1) {
9731 if (name == NULL) name = BAD_CAST "unparsable";
9732 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9733 "Opening and ending tag mismatch: %s line %d and %s\n",
9734 ctxt->name, tag->line, name);
9735 }
9736
9737 /*
9738 * SAX: End of Tag
9739 */
9740 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9741 (!ctxt->disableSAX))
9742 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9743 tag->URI);
9744
9745 spacePop(ctxt);
9746 if (tag->nsNr != 0)
9747 nsPop(ctxt, tag->nsNr);
9748 }
9749
9750 /**
9751 * xmlParseCDSect:
9752 * @ctxt: an XML parser context
9753 *
9754 * Parse escaped pure raw content.
9755 *
9756 * [18] CDSect ::= CDStart CData CDEnd
9757 *
9758 * [19] CDStart ::= '<![CDATA['
9759 *
9760 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9761 *
9762 * [21] CDEnd ::= ']]>'
9763 */
9764 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9765 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9766 xmlChar *buf = NULL;
9767 int len = 0;
9768 int size = XML_PARSER_BUFFER_SIZE;
9769 int r, rl;
9770 int s, sl;
9771 int cur, l;
9772 int count = 0;
9773 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9774 XML_MAX_HUGE_LENGTH :
9775 XML_MAX_TEXT_LENGTH;
9776
9777 /* Check 2.6.0 was NXT(0) not RAW */
9778 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9779 SKIP(9);
9780 } else
9781 return;
9782
9783 ctxt->instate = XML_PARSER_CDATA_SECTION;
9784 r = CUR_CHAR(rl);
9785 if (!IS_CHAR(r)) {
9786 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9787 ctxt->instate = XML_PARSER_CONTENT;
9788 return;
9789 }
9790 NEXTL(rl);
9791 s = CUR_CHAR(sl);
9792 if (!IS_CHAR(s)) {
9793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794 ctxt->instate = XML_PARSER_CONTENT;
9795 return;
9796 }
9797 NEXTL(sl);
9798 cur = CUR_CHAR(l);
9799 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9800 if (buf == NULL) {
9801 xmlErrMemory(ctxt, NULL);
9802 return;
9803 }
9804 while (IS_CHAR(cur) &&
9805 ((r != ']') || (s != ']') || (cur != '>'))) {
9806 if (len + 5 >= size) {
9807 xmlChar *tmp;
9808
9809 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9810 if (tmp == NULL) {
9811 xmlFree(buf);
9812 xmlErrMemory(ctxt, NULL);
9813 return;
9814 }
9815 buf = tmp;
9816 size *= 2;
9817 }
9818 COPY_BUF(rl,buf,len,r);
9819 r = s;
9820 rl = sl;
9821 s = cur;
9822 sl = l;
9823 count++;
9824 if (count > 50) {
9825 SHRINK;
9826 GROW;
9827 if (ctxt->instate == XML_PARSER_EOF) {
9828 xmlFree(buf);
9829 return;
9830 }
9831 count = 0;
9832 }
9833 NEXTL(l);
9834 cur = CUR_CHAR(l);
9835 if (len > maxLength) {
9836 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837 "CData section too big found\n");
9838 xmlFree(buf);
9839 return;
9840 }
9841 }
9842 buf[len] = 0;
9843 ctxt->instate = XML_PARSER_CONTENT;
9844 if (cur != '>') {
9845 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9846 "CData section not finished\n%.50s\n", buf);
9847 xmlFree(buf);
9848 return;
9849 }
9850 NEXTL(l);
9851
9852 /*
9853 * OK the buffer is to be consumed as cdata.
9854 */
9855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9856 if (ctxt->sax->cdataBlock != NULL)
9857 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9858 else if (ctxt->sax->characters != NULL)
9859 ctxt->sax->characters(ctxt->userData, buf, len);
9860 }
9861 xmlFree(buf);
9862 }
9863
9864 /**
9865 * xmlParseContentInternal:
9866 * @ctxt: an XML parser context
9867 *
9868 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9869 * unexpected EOF to the caller.
9870 */
9871
9872 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9873 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9874 int nameNr = ctxt->nameNr;
9875
9876 GROW;
9877 while ((RAW != 0) &&
9878 (ctxt->instate != XML_PARSER_EOF)) {
9879 int id = ctxt->input->id;
9880 unsigned long cons = CUR_CONSUMED;
9881 const xmlChar *cur = ctxt->input->cur;
9882
9883 /*
9884 * First case : a Processing Instruction.
9885 */
9886 if ((*cur == '<') && (cur[1] == '?')) {
9887 xmlParsePI(ctxt);
9888 }
9889
9890 /*
9891 * Second case : a CDSection
9892 */
9893 /* 2.6.0 test was *cur not RAW */
9894 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9895 xmlParseCDSect(ctxt);
9896 }
9897
9898 /*
9899 * Third case : a comment
9900 */
9901 else if ((*cur == '<') && (NXT(1) == '!') &&
9902 (NXT(2) == '-') && (NXT(3) == '-')) {
9903 xmlParseComment(ctxt);
9904 ctxt->instate = XML_PARSER_CONTENT;
9905 }
9906
9907 /*
9908 * Fourth case : a sub-element.
9909 */
9910 else if (*cur == '<') {
9911 if (NXT(1) == '/') {
9912 if (ctxt->nameNr <= nameNr)
9913 break;
9914 xmlParseElementEnd(ctxt);
9915 } else {
9916 xmlParseElementStart(ctxt);
9917 }
9918 }
9919
9920 /*
9921 * Fifth case : a reference. If if has not been resolved,
9922 * parsing returns it's Name, create the node
9923 */
9924
9925 else if (*cur == '&') {
9926 xmlParseReference(ctxt);
9927 }
9928
9929 /*
9930 * Last case, text. Note that References are handled directly.
9931 */
9932 else {
9933 xmlParseCharData(ctxt, 0);
9934 }
9935
9936 GROW;
9937 SHRINK;
9938
9939 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9940 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9941 "detected an error in element content\n");
9942 xmlHaltParser(ctxt);
9943 break;
9944 }
9945 }
9946 }
9947
9948 /**
9949 * xmlParseContent:
9950 * @ctxt: an XML parser context
9951 *
9952 * Parse a content sequence. Stops at EOF or '</'.
9953 *
9954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9955 */
9956
9957 void
xmlParseContent(xmlParserCtxtPtr ctxt)9958 xmlParseContent(xmlParserCtxtPtr ctxt) {
9959 int nameNr = ctxt->nameNr;
9960
9961 xmlParseContentInternal(ctxt);
9962
9963 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9964 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9965 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9966 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9967 "Premature end of data in tag %s line %d\n",
9968 name, line, NULL);
9969 }
9970 }
9971
9972 /**
9973 * xmlParseElement:
9974 * @ctxt: an XML parser context
9975 *
9976 * parse an XML element
9977 *
9978 * [39] element ::= EmptyElemTag | STag content ETag
9979 *
9980 * [ WFC: Element Type Match ]
9981 * The Name in an element's end-tag must match the element type in the
9982 * start-tag.
9983 *
9984 */
9985
9986 void
xmlParseElement(xmlParserCtxtPtr ctxt)9987 xmlParseElement(xmlParserCtxtPtr ctxt) {
9988 if (xmlParseElementStart(ctxt) != 0)
9989 return;
9990
9991 xmlParseContentInternal(ctxt);
9992 if (ctxt->instate == XML_PARSER_EOF)
9993 return;
9994
9995 if (CUR == 0) {
9996 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9997 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9999 "Premature end of data in tag %s line %d\n",
10000 name, line, NULL);
10001 return;
10002 }
10003
10004 xmlParseElementEnd(ctxt);
10005 }
10006
10007 /**
10008 * xmlParseElementStart:
10009 * @ctxt: an XML parser context
10010 *
10011 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10012 * opening tag was parsed, 1 if an empty element was parsed.
10013 */
10014 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10015 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10016 const xmlChar *name;
10017 const xmlChar *prefix = NULL;
10018 const xmlChar *URI = NULL;
10019 xmlParserNodeInfo node_info;
10020 int line, tlen = 0;
10021 xmlNodePtr ret;
10022 int nsNr = ctxt->nsNr;
10023
10024 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10025 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10026 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10027 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10028 xmlParserMaxDepth);
10029 xmlHaltParser(ctxt);
10030 return(-1);
10031 }
10032
10033 /* Capture start position */
10034 if (ctxt->record_info) {
10035 node_info.begin_pos = ctxt->input->consumed +
10036 (CUR_PTR - ctxt->input->base);
10037 node_info.begin_line = ctxt->input->line;
10038 }
10039
10040 if (ctxt->spaceNr == 0)
10041 spacePush(ctxt, -1);
10042 else if (*ctxt->space == -2)
10043 spacePush(ctxt, -1);
10044 else
10045 spacePush(ctxt, *ctxt->space);
10046
10047 line = ctxt->input->line;
10048 #ifdef LIBXML_SAX1_ENABLED
10049 if (ctxt->sax2)
10050 #endif /* LIBXML_SAX1_ENABLED */
10051 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10052 #ifdef LIBXML_SAX1_ENABLED
10053 else
10054 name = xmlParseStartTag(ctxt);
10055 #endif /* LIBXML_SAX1_ENABLED */
10056 if (ctxt->instate == XML_PARSER_EOF)
10057 return(-1);
10058 if (name == NULL) {
10059 spacePop(ctxt);
10060 return(-1);
10061 }
10062 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10063 ret = ctxt->node;
10064
10065 #ifdef LIBXML_VALID_ENABLED
10066 /*
10067 * [ VC: Root Element Type ]
10068 * The Name in the document type declaration must match the element
10069 * type of the root element.
10070 */
10071 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10072 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10073 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10074 #endif /* LIBXML_VALID_ENABLED */
10075
10076 /*
10077 * Check for an Empty Element.
10078 */
10079 if ((RAW == '/') && (NXT(1) == '>')) {
10080 SKIP(2);
10081 if (ctxt->sax2) {
10082 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10083 (!ctxt->disableSAX))
10084 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10085 #ifdef LIBXML_SAX1_ENABLED
10086 } else {
10087 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10088 (!ctxt->disableSAX))
10089 ctxt->sax->endElement(ctxt->userData, name);
10090 #endif /* LIBXML_SAX1_ENABLED */
10091 }
10092 namePop(ctxt);
10093 spacePop(ctxt);
10094 if (nsNr != ctxt->nsNr)
10095 nsPop(ctxt, ctxt->nsNr - nsNr);
10096 if ( ret != NULL && ctxt->record_info ) {
10097 node_info.end_pos = ctxt->input->consumed +
10098 (CUR_PTR - ctxt->input->base);
10099 node_info.end_line = ctxt->input->line;
10100 node_info.node = ret;
10101 xmlParserAddNodeInfo(ctxt, &node_info);
10102 }
10103 return(1);
10104 }
10105 if (RAW == '>') {
10106 NEXT1;
10107 } else {
10108 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10109 "Couldn't find end of Start Tag %s line %d\n",
10110 name, line, NULL);
10111
10112 /*
10113 * end of parsing of this node.
10114 */
10115 nodePop(ctxt);
10116 namePop(ctxt);
10117 spacePop(ctxt);
10118 if (nsNr != ctxt->nsNr)
10119 nsPop(ctxt, ctxt->nsNr - nsNr);
10120
10121 /*
10122 * Capture end position and add node
10123 */
10124 if ( ret != NULL && ctxt->record_info ) {
10125 node_info.end_pos = ctxt->input->consumed +
10126 (CUR_PTR - ctxt->input->base);
10127 node_info.end_line = ctxt->input->line;
10128 node_info.node = ret;
10129 xmlParserAddNodeInfo(ctxt, &node_info);
10130 }
10131 return(-1);
10132 }
10133
10134 return(0);
10135 }
10136
10137 /**
10138 * xmlParseElementEnd:
10139 * @ctxt: an XML parser context
10140 *
10141 * Parse the end of an XML element.
10142 */
10143 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10144 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10145 xmlParserNodeInfo node_info;
10146 xmlNodePtr ret = ctxt->node;
10147
10148 if (ctxt->nameNr <= 0)
10149 return;
10150
10151 /*
10152 * parse the end of tag: '</' should be here.
10153 */
10154 if (ctxt->sax2) {
10155 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10156 namePop(ctxt);
10157 }
10158 #ifdef LIBXML_SAX1_ENABLED
10159 else
10160 xmlParseEndTag1(ctxt, 0);
10161 #endif /* LIBXML_SAX1_ENABLED */
10162
10163 /*
10164 * Capture end position and add node
10165 */
10166 if ( ret != NULL && ctxt->record_info ) {
10167 node_info.end_pos = ctxt->input->consumed +
10168 (CUR_PTR - ctxt->input->base);
10169 node_info.end_line = ctxt->input->line;
10170 node_info.node = ret;
10171 xmlParserAddNodeInfo(ctxt, &node_info);
10172 }
10173 }
10174
10175 /**
10176 * xmlParseVersionNum:
10177 * @ctxt: an XML parser context
10178 *
10179 * parse the XML version value.
10180 *
10181 * [26] VersionNum ::= '1.' [0-9]+
10182 *
10183 * In practice allow [0-9].[0-9]+ at that level
10184 *
10185 * Returns the string giving the XML version number, or NULL
10186 */
10187 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10188 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10189 xmlChar *buf = NULL;
10190 int len = 0;
10191 int size = 10;
10192 xmlChar cur;
10193
10194 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10195 if (buf == NULL) {
10196 xmlErrMemory(ctxt, NULL);
10197 return(NULL);
10198 }
10199 cur = CUR;
10200 if (!((cur >= '0') && (cur <= '9'))) {
10201 xmlFree(buf);
10202 return(NULL);
10203 }
10204 buf[len++] = cur;
10205 NEXT;
10206 cur=CUR;
10207 if (cur != '.') {
10208 xmlFree(buf);
10209 return(NULL);
10210 }
10211 buf[len++] = cur;
10212 NEXT;
10213 cur=CUR;
10214 while ((cur >= '0') && (cur <= '9')) {
10215 if (len + 1 >= size) {
10216 xmlChar *tmp;
10217
10218 size *= 2;
10219 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10220 if (tmp == NULL) {
10221 xmlFree(buf);
10222 xmlErrMemory(ctxt, NULL);
10223 return(NULL);
10224 }
10225 buf = tmp;
10226 }
10227 buf[len++] = cur;
10228 NEXT;
10229 cur=CUR;
10230 }
10231 buf[len] = 0;
10232 return(buf);
10233 }
10234
10235 /**
10236 * xmlParseVersionInfo:
10237 * @ctxt: an XML parser context
10238 *
10239 * parse the XML version.
10240 *
10241 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10242 *
10243 * [25] Eq ::= S? '=' S?
10244 *
10245 * Returns the version string, e.g. "1.0"
10246 */
10247
10248 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10249 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10250 xmlChar *version = NULL;
10251
10252 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10253 SKIP(7);
10254 SKIP_BLANKS;
10255 if (RAW != '=') {
10256 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257 return(NULL);
10258 }
10259 NEXT;
10260 SKIP_BLANKS;
10261 if (RAW == '"') {
10262 NEXT;
10263 version = xmlParseVersionNum(ctxt);
10264 if (RAW != '"') {
10265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10266 } else
10267 NEXT;
10268 } else if (RAW == '\''){
10269 NEXT;
10270 version = xmlParseVersionNum(ctxt);
10271 if (RAW != '\'') {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273 } else
10274 NEXT;
10275 } else {
10276 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10277 }
10278 }
10279 return(version);
10280 }
10281
10282 /**
10283 * xmlParseEncName:
10284 * @ctxt: an XML parser context
10285 *
10286 * parse the XML encoding name
10287 *
10288 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10289 *
10290 * Returns the encoding name value or NULL
10291 */
10292 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10293 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10294 xmlChar *buf = NULL;
10295 int len = 0;
10296 int size = 10;
10297 xmlChar cur;
10298
10299 cur = CUR;
10300 if (((cur >= 'a') && (cur <= 'z')) ||
10301 ((cur >= 'A') && (cur <= 'Z'))) {
10302 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10303 if (buf == NULL) {
10304 xmlErrMemory(ctxt, NULL);
10305 return(NULL);
10306 }
10307
10308 buf[len++] = cur;
10309 NEXT;
10310 cur = CUR;
10311 while (((cur >= 'a') && (cur <= 'z')) ||
10312 ((cur >= 'A') && (cur <= 'Z')) ||
10313 ((cur >= '0') && (cur <= '9')) ||
10314 (cur == '.') || (cur == '_') ||
10315 (cur == '-')) {
10316 if (len + 1 >= size) {
10317 xmlChar *tmp;
10318
10319 size *= 2;
10320 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10321 if (tmp == NULL) {
10322 xmlErrMemory(ctxt, NULL);
10323 xmlFree(buf);
10324 return(NULL);
10325 }
10326 buf = tmp;
10327 }
10328 buf[len++] = cur;
10329 NEXT;
10330 cur = CUR;
10331 if (cur == 0) {
10332 SHRINK;
10333 GROW;
10334 cur = CUR;
10335 }
10336 }
10337 buf[len] = 0;
10338 } else {
10339 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10340 }
10341 return(buf);
10342 }
10343
10344 /**
10345 * xmlParseEncodingDecl:
10346 * @ctxt: an XML parser context
10347 *
10348 * parse the XML encoding declaration
10349 *
10350 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10351 *
10352 * this setups the conversion filters.
10353 *
10354 * Returns the encoding value or NULL
10355 */
10356
10357 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10358 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10359 xmlChar *encoding = NULL;
10360
10361 SKIP_BLANKS;
10362 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10363 SKIP(8);
10364 SKIP_BLANKS;
10365 if (RAW != '=') {
10366 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10367 return(NULL);
10368 }
10369 NEXT;
10370 SKIP_BLANKS;
10371 if (RAW == '"') {
10372 NEXT;
10373 encoding = xmlParseEncName(ctxt);
10374 if (RAW != '"') {
10375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376 xmlFree((xmlChar *) encoding);
10377 return(NULL);
10378 } else
10379 NEXT;
10380 } else if (RAW == '\''){
10381 NEXT;
10382 encoding = xmlParseEncName(ctxt);
10383 if (RAW != '\'') {
10384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10385 xmlFree((xmlChar *) encoding);
10386 return(NULL);
10387 } else
10388 NEXT;
10389 } else {
10390 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10391 }
10392
10393 /*
10394 * Non standard parsing, allowing the user to ignore encoding
10395 */
10396 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10397 xmlFree((xmlChar *) encoding);
10398 return(NULL);
10399 }
10400
10401 /*
10402 * UTF-16 encoding switch has already taken place at this stage,
10403 * more over the little-endian/big-endian selection is already done
10404 */
10405 if ((encoding != NULL) &&
10406 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10407 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10408 /*
10409 * If no encoding was passed to the parser, that we are
10410 * using UTF-16 and no decoder is present i.e. the
10411 * document is apparently UTF-8 compatible, then raise an
10412 * encoding mismatch fatal error
10413 */
10414 if ((ctxt->encoding == NULL) &&
10415 (ctxt->input->buf != NULL) &&
10416 (ctxt->input->buf->encoder == NULL)) {
10417 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10418 "Document labelled UTF-16 but has UTF-8 content\n");
10419 }
10420 if (ctxt->encoding != NULL)
10421 xmlFree((xmlChar *) ctxt->encoding);
10422 ctxt->encoding = encoding;
10423 }
10424 /*
10425 * UTF-8 encoding is handled natively
10426 */
10427 else if ((encoding != NULL) &&
10428 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10429 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10430 if (ctxt->encoding != NULL)
10431 xmlFree((xmlChar *) ctxt->encoding);
10432 ctxt->encoding = encoding;
10433 }
10434 else if (encoding != NULL) {
10435 xmlCharEncodingHandlerPtr handler;
10436
10437 if (ctxt->input->encoding != NULL)
10438 xmlFree((xmlChar *) ctxt->input->encoding);
10439 ctxt->input->encoding = encoding;
10440
10441 handler = xmlFindCharEncodingHandler((const char *) encoding);
10442 if (handler != NULL) {
10443 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10444 /* failed to convert */
10445 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10446 return(NULL);
10447 }
10448 } else {
10449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10450 "Unsupported encoding %s\n", encoding);
10451 return(NULL);
10452 }
10453 }
10454 }
10455 return(encoding);
10456 }
10457
10458 /**
10459 * xmlParseSDDecl:
10460 * @ctxt: an XML parser context
10461 *
10462 * parse the XML standalone declaration
10463 *
10464 * [32] SDDecl ::= S 'standalone' Eq
10465 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10466 *
10467 * [ VC: Standalone Document Declaration ]
10468 * TODO The standalone document declaration must have the value "no"
10469 * if any external markup declarations contain declarations of:
10470 * - attributes with default values, if elements to which these
10471 * attributes apply appear in the document without specifications
10472 * of values for these attributes, or
10473 * - entities (other than amp, lt, gt, apos, quot), if references
10474 * to those entities appear in the document, or
10475 * - attributes with values subject to normalization, where the
10476 * attribute appears in the document with a value which will change
10477 * as a result of normalization, or
10478 * - element types with element content, if white space occurs directly
10479 * within any instance of those types.
10480 *
10481 * Returns:
10482 * 1 if standalone="yes"
10483 * 0 if standalone="no"
10484 * -2 if standalone attribute is missing or invalid
10485 * (A standalone value of -2 means that the XML declaration was found,
10486 * but no value was specified for the standalone attribute).
10487 */
10488
10489 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10490 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10491 int standalone = -2;
10492
10493 SKIP_BLANKS;
10494 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10495 SKIP(10);
10496 SKIP_BLANKS;
10497 if (RAW != '=') {
10498 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10499 return(standalone);
10500 }
10501 NEXT;
10502 SKIP_BLANKS;
10503 if (RAW == '\''){
10504 NEXT;
10505 if ((RAW == 'n') && (NXT(1) == 'o')) {
10506 standalone = 0;
10507 SKIP(2);
10508 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10509 (NXT(2) == 's')) {
10510 standalone = 1;
10511 SKIP(3);
10512 } else {
10513 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10514 }
10515 if (RAW != '\'') {
10516 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10517 } else
10518 NEXT;
10519 } else if (RAW == '"'){
10520 NEXT;
10521 if ((RAW == 'n') && (NXT(1) == 'o')) {
10522 standalone = 0;
10523 SKIP(2);
10524 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525 (NXT(2) == 's')) {
10526 standalone = 1;
10527 SKIP(3);
10528 } else {
10529 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530 }
10531 if (RAW != '"') {
10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533 } else
10534 NEXT;
10535 } else {
10536 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10537 }
10538 }
10539 return(standalone);
10540 }
10541
10542 /**
10543 * xmlParseXMLDecl:
10544 * @ctxt: an XML parser context
10545 *
10546 * parse an XML declaration header
10547 *
10548 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10549 */
10550
10551 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10552 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10553 xmlChar *version;
10554
10555 /*
10556 * This value for standalone indicates that the document has an
10557 * XML declaration but it does not have a standalone attribute.
10558 * It will be overwritten later if a standalone attribute is found.
10559 */
10560 ctxt->input->standalone = -2;
10561
10562 /*
10563 * We know that '<?xml' is here.
10564 */
10565 SKIP(5);
10566
10567 if (!IS_BLANK_CH(RAW)) {
10568 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10569 "Blank needed after '<?xml'\n");
10570 }
10571 SKIP_BLANKS;
10572
10573 /*
10574 * We must have the VersionInfo here.
10575 */
10576 version = xmlParseVersionInfo(ctxt);
10577 if (version == NULL) {
10578 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10579 } else {
10580 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10581 /*
10582 * Changed here for XML-1.0 5th edition
10583 */
10584 if (ctxt->options & XML_PARSE_OLD10) {
10585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586 "Unsupported version '%s'\n",
10587 version);
10588 } else {
10589 if ((version[0] == '1') && ((version[1] == '.'))) {
10590 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10591 "Unsupported version '%s'\n",
10592 version, NULL);
10593 } else {
10594 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10595 "Unsupported version '%s'\n",
10596 version);
10597 }
10598 }
10599 }
10600 if (ctxt->version != NULL)
10601 xmlFree((void *) ctxt->version);
10602 ctxt->version = version;
10603 }
10604
10605 /*
10606 * We may have the encoding declaration
10607 */
10608 if (!IS_BLANK_CH(RAW)) {
10609 if ((RAW == '?') && (NXT(1) == '>')) {
10610 SKIP(2);
10611 return;
10612 }
10613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10614 }
10615 xmlParseEncodingDecl(ctxt);
10616 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10617 (ctxt->instate == XML_PARSER_EOF)) {
10618 /*
10619 * The XML REC instructs us to stop parsing right here
10620 */
10621 return;
10622 }
10623
10624 /*
10625 * We may have the standalone status.
10626 */
10627 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10628 if ((RAW == '?') && (NXT(1) == '>')) {
10629 SKIP(2);
10630 return;
10631 }
10632 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10633 }
10634
10635 /*
10636 * We can grow the input buffer freely at that point
10637 */
10638 GROW;
10639
10640 SKIP_BLANKS;
10641 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10642
10643 SKIP_BLANKS;
10644 if ((RAW == '?') && (NXT(1) == '>')) {
10645 SKIP(2);
10646 } else if (RAW == '>') {
10647 /* Deprecated old WD ... */
10648 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10649 NEXT;
10650 } else {
10651 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652 MOVETO_ENDTAG(CUR_PTR);
10653 NEXT;
10654 }
10655 }
10656
10657 /**
10658 * xmlParseMisc:
10659 * @ctxt: an XML parser context
10660 *
10661 * parse an XML Misc* optional field.
10662 *
10663 * [27] Misc ::= Comment | PI | S
10664 */
10665
10666 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10667 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10668 while (ctxt->instate != XML_PARSER_EOF) {
10669 SKIP_BLANKS;
10670 GROW;
10671 if ((RAW == '<') && (NXT(1) == '?')) {
10672 xmlParsePI(ctxt);
10673 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10674 xmlParseComment(ctxt);
10675 } else {
10676 break;
10677 }
10678 }
10679 }
10680
10681 /**
10682 * xmlParseDocument:
10683 * @ctxt: an XML parser context
10684 *
10685 * parse an XML document (and build a tree if using the standard SAX
10686 * interface).
10687 *
10688 * [1] document ::= prolog element Misc*
10689 *
10690 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10691 *
10692 * Returns 0, -1 in case of error. the parser context is augmented
10693 * as a result of the parsing.
10694 */
10695
10696 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10697 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10698 xmlChar start[4];
10699 xmlCharEncoding enc;
10700
10701 xmlInitParser();
10702
10703 if ((ctxt == NULL) || (ctxt->input == NULL))
10704 return(-1);
10705
10706 GROW;
10707
10708 /*
10709 * SAX: detecting the level.
10710 */
10711 xmlDetectSAX2(ctxt);
10712
10713 /*
10714 * SAX: beginning of the document processing.
10715 */
10716 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10717 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10718 if (ctxt->instate == XML_PARSER_EOF)
10719 return(-1);
10720
10721 if ((ctxt->encoding == NULL) &&
10722 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10723 /*
10724 * Get the 4 first bytes and decode the charset
10725 * if enc != XML_CHAR_ENCODING_NONE
10726 * plug some encoding conversion routines.
10727 */
10728 start[0] = RAW;
10729 start[1] = NXT(1);
10730 start[2] = NXT(2);
10731 start[3] = NXT(3);
10732 enc = xmlDetectCharEncoding(&start[0], 4);
10733 if (enc != XML_CHAR_ENCODING_NONE) {
10734 xmlSwitchEncoding(ctxt, enc);
10735 }
10736 }
10737
10738
10739 if (CUR == 0) {
10740 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10741 return(-1);
10742 }
10743
10744 /*
10745 * Check for the XMLDecl in the Prolog.
10746 * do not GROW here to avoid the detected encoder to decode more
10747 * than just the first line, unless the amount of data is really
10748 * too small to hold "<?xml version="1.0" encoding="foo"
10749 */
10750 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10751 GROW;
10752 }
10753 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10754
10755 /*
10756 * Note that we will switch encoding on the fly.
10757 */
10758 xmlParseXMLDecl(ctxt);
10759 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10760 (ctxt->instate == XML_PARSER_EOF)) {
10761 /*
10762 * The XML REC instructs us to stop parsing right here
10763 */
10764 return(-1);
10765 }
10766 ctxt->standalone = ctxt->input->standalone;
10767 SKIP_BLANKS;
10768 } else {
10769 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10770 }
10771 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10772 ctxt->sax->startDocument(ctxt->userData);
10773 if (ctxt->instate == XML_PARSER_EOF)
10774 return(-1);
10775 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10776 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10777 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10778 }
10779
10780 /*
10781 * The Misc part of the Prolog
10782 */
10783 xmlParseMisc(ctxt);
10784
10785 /*
10786 * Then possibly doc type declaration(s) and more Misc
10787 * (doctypedecl Misc*)?
10788 */
10789 GROW;
10790 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10791
10792 ctxt->inSubset = 1;
10793 xmlParseDocTypeDecl(ctxt);
10794 if (RAW == '[') {
10795 ctxt->instate = XML_PARSER_DTD;
10796 xmlParseInternalSubset(ctxt);
10797 if (ctxt->instate == XML_PARSER_EOF)
10798 return(-1);
10799 }
10800
10801 /*
10802 * Create and update the external subset.
10803 */
10804 ctxt->inSubset = 2;
10805 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10806 (!ctxt->disableSAX))
10807 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10808 ctxt->extSubSystem, ctxt->extSubURI);
10809 if (ctxt->instate == XML_PARSER_EOF)
10810 return(-1);
10811 ctxt->inSubset = 0;
10812
10813 xmlCleanSpecialAttr(ctxt);
10814
10815 ctxt->instate = XML_PARSER_PROLOG;
10816 xmlParseMisc(ctxt);
10817 }
10818
10819 /*
10820 * Time to start parsing the tree itself
10821 */
10822 GROW;
10823 if (RAW != '<') {
10824 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10825 "Start tag expected, '<' not found\n");
10826 } else {
10827 ctxt->instate = XML_PARSER_CONTENT;
10828 xmlParseElement(ctxt);
10829 ctxt->instate = XML_PARSER_EPILOG;
10830
10831
10832 /*
10833 * The Misc part at the end
10834 */
10835 xmlParseMisc(ctxt);
10836
10837 if (RAW != 0) {
10838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10839 }
10840 ctxt->instate = XML_PARSER_EOF;
10841 }
10842
10843 /*
10844 * SAX: end of the document processing.
10845 */
10846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10847 ctxt->sax->endDocument(ctxt->userData);
10848
10849 /*
10850 * Remove locally kept entity definitions if the tree was not built
10851 */
10852 if ((ctxt->myDoc != NULL) &&
10853 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10854 xmlFreeDoc(ctxt->myDoc);
10855 ctxt->myDoc = NULL;
10856 }
10857
10858 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10859 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10860 if (ctxt->valid)
10861 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10862 if (ctxt->nsWellFormed)
10863 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10864 if (ctxt->options & XML_PARSE_OLD10)
10865 ctxt->myDoc->properties |= XML_DOC_OLD10;
10866 }
10867 if (! ctxt->wellFormed) {
10868 ctxt->valid = 0;
10869 return(-1);
10870 }
10871 return(0);
10872 }
10873
10874 /**
10875 * xmlParseExtParsedEnt:
10876 * @ctxt: an XML parser context
10877 *
10878 * parse a general parsed entity
10879 * An external general parsed entity is well-formed if it matches the
10880 * production labeled extParsedEnt.
10881 *
10882 * [78] extParsedEnt ::= TextDecl? content
10883 *
10884 * Returns 0, -1 in case of error. the parser context is augmented
10885 * as a result of the parsing.
10886 */
10887
10888 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10889 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10890 xmlChar start[4];
10891 xmlCharEncoding enc;
10892
10893 if ((ctxt == NULL) || (ctxt->input == NULL))
10894 return(-1);
10895
10896 xmlDetectSAX2(ctxt);
10897
10898 GROW;
10899
10900 /*
10901 * SAX: beginning of the document processing.
10902 */
10903 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10904 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10905
10906 /*
10907 * Get the 4 first bytes and decode the charset
10908 * if enc != XML_CHAR_ENCODING_NONE
10909 * plug some encoding conversion routines.
10910 */
10911 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10912 start[0] = RAW;
10913 start[1] = NXT(1);
10914 start[2] = NXT(2);
10915 start[3] = NXT(3);
10916 enc = xmlDetectCharEncoding(start, 4);
10917 if (enc != XML_CHAR_ENCODING_NONE) {
10918 xmlSwitchEncoding(ctxt, enc);
10919 }
10920 }
10921
10922
10923 if (CUR == 0) {
10924 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10925 }
10926
10927 /*
10928 * Check for the XMLDecl in the Prolog.
10929 */
10930 GROW;
10931 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10932
10933 /*
10934 * Note that we will switch encoding on the fly.
10935 */
10936 xmlParseXMLDecl(ctxt);
10937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10938 /*
10939 * The XML REC instructs us to stop parsing right here
10940 */
10941 return(-1);
10942 }
10943 SKIP_BLANKS;
10944 } else {
10945 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10946 }
10947 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10948 ctxt->sax->startDocument(ctxt->userData);
10949 if (ctxt->instate == XML_PARSER_EOF)
10950 return(-1);
10951
10952 /*
10953 * Doing validity checking on chunk doesn't make sense
10954 */
10955 ctxt->instate = XML_PARSER_CONTENT;
10956 ctxt->validate = 0;
10957 ctxt->loadsubset = 0;
10958 ctxt->depth = 0;
10959
10960 xmlParseContent(ctxt);
10961 if (ctxt->instate == XML_PARSER_EOF)
10962 return(-1);
10963
10964 if ((RAW == '<') && (NXT(1) == '/')) {
10965 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10966 } else if (RAW != 0) {
10967 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10968 }
10969
10970 /*
10971 * SAX: end of the document processing.
10972 */
10973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974 ctxt->sax->endDocument(ctxt->userData);
10975
10976 if (! ctxt->wellFormed) return(-1);
10977 return(0);
10978 }
10979
10980 #ifdef LIBXML_PUSH_ENABLED
10981 /************************************************************************
10982 * *
10983 * Progressive parsing interfaces *
10984 * *
10985 ************************************************************************/
10986
10987 /**
10988 * xmlParseLookupSequence:
10989 * @ctxt: an XML parser context
10990 * @first: the first char to lookup
10991 * @next: the next char to lookup or zero
10992 * @third: the next char to lookup or zero
10993 *
10994 * Try to find if a sequence (first, next, third) or just (first next) or
10995 * (first) is available in the input stream.
10996 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997 * to avoid rescanning sequences of bytes, it DOES change the state of the
10998 * parser, do not use liberally.
10999 *
11000 * Returns the index to the current parsing point if the full sequence
11001 * is available, -1 otherwise.
11002 */
11003 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11004 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11005 xmlChar next, xmlChar third) {
11006 int base, len;
11007 xmlParserInputPtr in;
11008 const xmlChar *buf;
11009
11010 in = ctxt->input;
11011 if (in == NULL) return(-1);
11012 base = in->cur - in->base;
11013 if (base < 0) return(-1);
11014 if (ctxt->checkIndex > base)
11015 base = ctxt->checkIndex;
11016 if (in->buf == NULL) {
11017 buf = in->base;
11018 len = in->length;
11019 } else {
11020 buf = xmlBufContent(in->buf->buffer);
11021 len = xmlBufUse(in->buf->buffer);
11022 }
11023 /* take into account the sequence length */
11024 if (third) len -= 2;
11025 else if (next) len --;
11026 for (;base < len;base++) {
11027 if (buf[base] == first) {
11028 if (third != 0) {
11029 if ((buf[base + 1] != next) ||
11030 (buf[base + 2] != third)) continue;
11031 } else if (next != 0) {
11032 if (buf[base + 1] != next) continue;
11033 }
11034 ctxt->checkIndex = 0;
11035 #ifdef DEBUG_PUSH
11036 if (next == 0)
11037 xmlGenericError(xmlGenericErrorContext,
11038 "PP: lookup '%c' found at %d\n",
11039 first, base);
11040 else if (third == 0)
11041 xmlGenericError(xmlGenericErrorContext,
11042 "PP: lookup '%c%c' found at %d\n",
11043 first, next, base);
11044 else
11045 xmlGenericError(xmlGenericErrorContext,
11046 "PP: lookup '%c%c%c' found at %d\n",
11047 first, next, third, base);
11048 #endif
11049 return(base - (in->cur - in->base));
11050 }
11051 }
11052 ctxt->checkIndex = base;
11053 #ifdef DEBUG_PUSH
11054 if (next == 0)
11055 xmlGenericError(xmlGenericErrorContext,
11056 "PP: lookup '%c' failed\n", first);
11057 else if (third == 0)
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: lookup '%c%c' failed\n", first, next);
11060 else
11061 xmlGenericError(xmlGenericErrorContext,
11062 "PP: lookup '%c%c%c' failed\n", first, next, third);
11063 #endif
11064 return(-1);
11065 }
11066
11067 /**
11068 * xmlParseGetLasts:
11069 * @ctxt: an XML parser context
11070 * @lastlt: pointer to store the last '<' from the input
11071 * @lastgt: pointer to store the last '>' from the input
11072 *
11073 * Lookup the last < and > in the current chunk
11074 */
11075 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11076 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11077 const xmlChar **lastgt) {
11078 const xmlChar *tmp;
11079
11080 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11081 xmlGenericError(xmlGenericErrorContext,
11082 "Internal error: xmlParseGetLasts\n");
11083 return;
11084 }
11085 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11086 tmp = ctxt->input->end;
11087 tmp--;
11088 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11089 if (tmp < ctxt->input->base) {
11090 *lastlt = NULL;
11091 *lastgt = NULL;
11092 } else {
11093 *lastlt = tmp;
11094 tmp++;
11095 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11096 if (*tmp == '\'') {
11097 tmp++;
11098 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11099 if (tmp < ctxt->input->end) tmp++;
11100 } else if (*tmp == '"') {
11101 tmp++;
11102 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11103 if (tmp < ctxt->input->end) tmp++;
11104 } else
11105 tmp++;
11106 }
11107 if (tmp < ctxt->input->end)
11108 *lastgt = tmp;
11109 else {
11110 tmp = *lastlt;
11111 tmp--;
11112 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11113 if (tmp >= ctxt->input->base)
11114 *lastgt = tmp;
11115 else
11116 *lastgt = NULL;
11117 }
11118 }
11119 } else {
11120 *lastlt = NULL;
11121 *lastgt = NULL;
11122 }
11123 }
11124 /**
11125 * xmlCheckCdataPush:
11126 * @cur: pointer to the block of characters
11127 * @len: length of the block in bytes
11128 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11129 *
11130 * Check that the block of characters is okay as SCdata content [20]
11131 *
11132 * Returns the number of bytes to pass if okay, a negative index where an
11133 * UTF-8 error occurred otherwise
11134 */
11135 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11136 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11137 int ix;
11138 unsigned char c;
11139 int codepoint;
11140
11141 if ((utf == NULL) || (len <= 0))
11142 return(0);
11143
11144 for (ix = 0; ix < len;) { /* string is 0-terminated */
11145 c = utf[ix];
11146 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11147 if (c >= 0x20)
11148 ix++;
11149 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11150 ix++;
11151 else
11152 return(-ix);
11153 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11154 if (ix + 2 > len) return(complete ? -ix : ix);
11155 if ((utf[ix+1] & 0xc0 ) != 0x80)
11156 return(-ix);
11157 codepoint = (utf[ix] & 0x1f) << 6;
11158 codepoint |= utf[ix+1] & 0x3f;
11159 if (!xmlIsCharQ(codepoint))
11160 return(-ix);
11161 ix += 2;
11162 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11163 if (ix + 3 > len) return(complete ? -ix : ix);
11164 if (((utf[ix+1] & 0xc0) != 0x80) ||
11165 ((utf[ix+2] & 0xc0) != 0x80))
11166 return(-ix);
11167 codepoint = (utf[ix] & 0xf) << 12;
11168 codepoint |= (utf[ix+1] & 0x3f) << 6;
11169 codepoint |= utf[ix+2] & 0x3f;
11170 if (!xmlIsCharQ(codepoint))
11171 return(-ix);
11172 ix += 3;
11173 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11174 if (ix + 4 > len) return(complete ? -ix : ix);
11175 if (((utf[ix+1] & 0xc0) != 0x80) ||
11176 ((utf[ix+2] & 0xc0) != 0x80) ||
11177 ((utf[ix+3] & 0xc0) != 0x80))
11178 return(-ix);
11179 codepoint = (utf[ix] & 0x7) << 18;
11180 codepoint |= (utf[ix+1] & 0x3f) << 12;
11181 codepoint |= (utf[ix+2] & 0x3f) << 6;
11182 codepoint |= utf[ix+3] & 0x3f;
11183 if (!xmlIsCharQ(codepoint))
11184 return(-ix);
11185 ix += 4;
11186 } else /* unknown encoding */
11187 return(-ix);
11188 }
11189 return(ix);
11190 }
11191
11192 /**
11193 * xmlParseTryOrFinish:
11194 * @ctxt: an XML parser context
11195 * @terminate: last chunk indicator
11196 *
11197 * Try to progress on parsing
11198 *
11199 * Returns zero if no parsing was possible
11200 */
11201 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11202 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11203 int ret = 0;
11204 int avail, tlen;
11205 xmlChar cur, next;
11206 const xmlChar *lastlt, *lastgt;
11207
11208 if (ctxt->input == NULL)
11209 return(0);
11210
11211 #ifdef DEBUG_PUSH
11212 switch (ctxt->instate) {
11213 case XML_PARSER_EOF:
11214 xmlGenericError(xmlGenericErrorContext,
11215 "PP: try EOF\n"); break;
11216 case XML_PARSER_START:
11217 xmlGenericError(xmlGenericErrorContext,
11218 "PP: try START\n"); break;
11219 case XML_PARSER_MISC:
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: try MISC\n");break;
11222 case XML_PARSER_COMMENT:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try COMMENT\n");break;
11225 case XML_PARSER_PROLOG:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try PROLOG\n");break;
11228 case XML_PARSER_START_TAG:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try START_TAG\n");break;
11231 case XML_PARSER_CONTENT:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try CONTENT\n");break;
11234 case XML_PARSER_CDATA_SECTION:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try CDATA_SECTION\n");break;
11237 case XML_PARSER_END_TAG:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try END_TAG\n");break;
11240 case XML_PARSER_ENTITY_DECL:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try ENTITY_DECL\n");break;
11243 case XML_PARSER_ENTITY_VALUE:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try ENTITY_VALUE\n");break;
11246 case XML_PARSER_ATTRIBUTE_VALUE:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try ATTRIBUTE_VALUE\n");break;
11249 case XML_PARSER_DTD:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try DTD\n");break;
11252 case XML_PARSER_EPILOG:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try EPILOG\n");break;
11255 case XML_PARSER_PI:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try PI\n");break;
11258 case XML_PARSER_IGNORE:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try IGNORE\n");break;
11261 }
11262 #endif
11263
11264 if ((ctxt->input != NULL) &&
11265 (ctxt->input->cur - ctxt->input->base > 4096)) {
11266 xmlSHRINK(ctxt);
11267 ctxt->checkIndex = 0;
11268 }
11269 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11270
11271 while (ctxt->instate != XML_PARSER_EOF) {
11272 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11273 return(0);
11274
11275 if (ctxt->input == NULL) break;
11276 if (ctxt->input->buf == NULL)
11277 avail = ctxt->input->length -
11278 (ctxt->input->cur - ctxt->input->base);
11279 else {
11280 /*
11281 * If we are operating on converted input, try to flush
11282 * remaining chars to avoid them stalling in the non-converted
11283 * buffer. But do not do this in document start where
11284 * encoding="..." may not have been read and we work on a
11285 * guessed encoding.
11286 */
11287 if ((ctxt->instate != XML_PARSER_START) &&
11288 (ctxt->input->buf->raw != NULL) &&
11289 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11290 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11291 ctxt->input);
11292 size_t current = ctxt->input->cur - ctxt->input->base;
11293
11294 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11296 base, current);
11297 }
11298 avail = xmlBufUse(ctxt->input->buf->buffer) -
11299 (ctxt->input->cur - ctxt->input->base);
11300 }
11301 if (avail < 1)
11302 goto done;
11303 switch (ctxt->instate) {
11304 case XML_PARSER_EOF:
11305 /*
11306 * Document parsing is done !
11307 */
11308 goto done;
11309 case XML_PARSER_START:
11310 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11311 xmlChar start[4];
11312 xmlCharEncoding enc;
11313
11314 /*
11315 * Very first chars read from the document flow.
11316 */
11317 if (avail < 4)
11318 goto done;
11319
11320 /*
11321 * Get the 4 first bytes and decode the charset
11322 * if enc != XML_CHAR_ENCODING_NONE
11323 * plug some encoding conversion routines,
11324 * else xmlSwitchEncoding will set to (default)
11325 * UTF8.
11326 */
11327 start[0] = RAW;
11328 start[1] = NXT(1);
11329 start[2] = NXT(2);
11330 start[3] = NXT(3);
11331 enc = xmlDetectCharEncoding(start, 4);
11332 xmlSwitchEncoding(ctxt, enc);
11333 break;
11334 }
11335
11336 if (avail < 2)
11337 goto done;
11338 cur = ctxt->input->cur[0];
11339 next = ctxt->input->cur[1];
11340 if (cur == 0) {
11341 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11342 ctxt->sax->setDocumentLocator(ctxt->userData,
11343 &xmlDefaultSAXLocator);
11344 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11345 xmlHaltParser(ctxt);
11346 #ifdef DEBUG_PUSH
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: entering EOF\n");
11349 #endif
11350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351 ctxt->sax->endDocument(ctxt->userData);
11352 goto done;
11353 }
11354 if ((cur == '<') && (next == '?')) {
11355 /* PI or XML decl */
11356 if (avail < 5) return(ret);
11357 if ((!terminate) &&
11358 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11359 return(ret);
11360 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11361 ctxt->sax->setDocumentLocator(ctxt->userData,
11362 &xmlDefaultSAXLocator);
11363 if ((ctxt->input->cur[2] == 'x') &&
11364 (ctxt->input->cur[3] == 'm') &&
11365 (ctxt->input->cur[4] == 'l') &&
11366 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11367 ret += 5;
11368 #ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: Parsing XML Decl\n");
11371 #endif
11372 xmlParseXMLDecl(ctxt);
11373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11374 /*
11375 * The XML REC instructs us to stop parsing right
11376 * here
11377 */
11378 xmlHaltParser(ctxt);
11379 return(0);
11380 }
11381 ctxt->standalone = ctxt->input->standalone;
11382 if ((ctxt->encoding == NULL) &&
11383 (ctxt->input->encoding != NULL))
11384 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11385 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11386 (!ctxt->disableSAX))
11387 ctxt->sax->startDocument(ctxt->userData);
11388 ctxt->instate = XML_PARSER_MISC;
11389 #ifdef DEBUG_PUSH
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: entering MISC\n");
11392 #endif
11393 } else {
11394 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11395 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11396 (!ctxt->disableSAX))
11397 ctxt->sax->startDocument(ctxt->userData);
11398 ctxt->instate = XML_PARSER_MISC;
11399 #ifdef DEBUG_PUSH
11400 xmlGenericError(xmlGenericErrorContext,
11401 "PP: entering MISC\n");
11402 #endif
11403 }
11404 } else {
11405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11406 ctxt->sax->setDocumentLocator(ctxt->userData,
11407 &xmlDefaultSAXLocator);
11408 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11409 if (ctxt->version == NULL) {
11410 xmlErrMemory(ctxt, NULL);
11411 break;
11412 }
11413 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->startDocument(ctxt->userData);
11416 ctxt->instate = XML_PARSER_MISC;
11417 #ifdef DEBUG_PUSH
11418 xmlGenericError(xmlGenericErrorContext,
11419 "PP: entering MISC\n");
11420 #endif
11421 }
11422 break;
11423 case XML_PARSER_START_TAG: {
11424 const xmlChar *name;
11425 const xmlChar *prefix = NULL;
11426 const xmlChar *URI = NULL;
11427 int line = ctxt->input->line;
11428 int nsNr = ctxt->nsNr;
11429
11430 if ((avail < 2) && (ctxt->inputNr == 1))
11431 goto done;
11432 cur = ctxt->input->cur[0];
11433 if (cur != '<') {
11434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11435 xmlHaltParser(ctxt);
11436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437 ctxt->sax->endDocument(ctxt->userData);
11438 goto done;
11439 }
11440 if (!terminate) {
11441 if (ctxt->progressive) {
11442 /* > can be found unescaped in attribute values */
11443 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11444 goto done;
11445 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11446 goto done;
11447 }
11448 }
11449 if (ctxt->spaceNr == 0)
11450 spacePush(ctxt, -1);
11451 else if (*ctxt->space == -2)
11452 spacePush(ctxt, -1);
11453 else
11454 spacePush(ctxt, *ctxt->space);
11455 #ifdef LIBXML_SAX1_ENABLED
11456 if (ctxt->sax2)
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11459 #ifdef LIBXML_SAX1_ENABLED
11460 else
11461 name = xmlParseStartTag(ctxt);
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 if (ctxt->instate == XML_PARSER_EOF)
11464 goto done;
11465 if (name == NULL) {
11466 spacePop(ctxt);
11467 xmlHaltParser(ctxt);
11468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11469 ctxt->sax->endDocument(ctxt->userData);
11470 goto done;
11471 }
11472 #ifdef LIBXML_VALID_ENABLED
11473 /*
11474 * [ VC: Root Element Type ]
11475 * The Name in the document type declaration must match
11476 * the element type of the root element.
11477 */
11478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11479 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11481 #endif /* LIBXML_VALID_ENABLED */
11482
11483 /*
11484 * Check for an Empty Element.
11485 */
11486 if ((RAW == '/') && (NXT(1) == '>')) {
11487 SKIP(2);
11488
11489 if (ctxt->sax2) {
11490 if ((ctxt->sax != NULL) &&
11491 (ctxt->sax->endElementNs != NULL) &&
11492 (!ctxt->disableSAX))
11493 ctxt->sax->endElementNs(ctxt->userData, name,
11494 prefix, URI);
11495 if (ctxt->nsNr - nsNr > 0)
11496 nsPop(ctxt, ctxt->nsNr - nsNr);
11497 #ifdef LIBXML_SAX1_ENABLED
11498 } else {
11499 if ((ctxt->sax != NULL) &&
11500 (ctxt->sax->endElement != NULL) &&
11501 (!ctxt->disableSAX))
11502 ctxt->sax->endElement(ctxt->userData, name);
11503 #endif /* LIBXML_SAX1_ENABLED */
11504 }
11505 if (ctxt->instate == XML_PARSER_EOF)
11506 goto done;
11507 spacePop(ctxt);
11508 if (ctxt->nameNr == 0) {
11509 ctxt->instate = XML_PARSER_EPILOG;
11510 } else {
11511 ctxt->instate = XML_PARSER_CONTENT;
11512 }
11513 ctxt->progressive = 1;
11514 break;
11515 }
11516 if (RAW == '>') {
11517 NEXT;
11518 } else {
11519 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11520 "Couldn't find end of Start Tag %s\n",
11521 name);
11522 nodePop(ctxt);
11523 spacePop(ctxt);
11524 }
11525 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11526
11527 ctxt->instate = XML_PARSER_CONTENT;
11528 ctxt->progressive = 1;
11529 break;
11530 }
11531 case XML_PARSER_CONTENT: {
11532 int id;
11533 unsigned long cons;
11534 if ((avail < 2) && (ctxt->inputNr == 1))
11535 goto done;
11536 cur = ctxt->input->cur[0];
11537 next = ctxt->input->cur[1];
11538
11539 id = ctxt->input->id;
11540 cons = CUR_CONSUMED;
11541 if ((cur == '<') && (next == '/')) {
11542 ctxt->instate = XML_PARSER_END_TAG;
11543 break;
11544 } else if ((cur == '<') && (next == '?')) {
11545 if ((!terminate) &&
11546 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11547 ctxt->progressive = XML_PARSER_PI;
11548 goto done;
11549 }
11550 xmlParsePI(ctxt);
11551 ctxt->instate = XML_PARSER_CONTENT;
11552 ctxt->progressive = 1;
11553 } else if ((cur == '<') && (next != '!')) {
11554 ctxt->instate = XML_PARSER_START_TAG;
11555 break;
11556 } else if ((cur == '<') && (next == '!') &&
11557 (ctxt->input->cur[2] == '-') &&
11558 (ctxt->input->cur[3] == '-')) {
11559 int term;
11560
11561 if (avail < 4)
11562 goto done;
11563 ctxt->input->cur += 4;
11564 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11565 ctxt->input->cur -= 4;
11566 if ((!terminate) && (term < 0)) {
11567 ctxt->progressive = XML_PARSER_COMMENT;
11568 goto done;
11569 }
11570 xmlParseComment(ctxt);
11571 ctxt->instate = XML_PARSER_CONTENT;
11572 ctxt->progressive = 1;
11573 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11574 (ctxt->input->cur[2] == '[') &&
11575 (ctxt->input->cur[3] == 'C') &&
11576 (ctxt->input->cur[4] == 'D') &&
11577 (ctxt->input->cur[5] == 'A') &&
11578 (ctxt->input->cur[6] == 'T') &&
11579 (ctxt->input->cur[7] == 'A') &&
11580 (ctxt->input->cur[8] == '[')) {
11581 SKIP(9);
11582 ctxt->instate = XML_PARSER_CDATA_SECTION;
11583 break;
11584 } else if ((cur == '<') && (next == '!') &&
11585 (avail < 9)) {
11586 goto done;
11587 } else if (cur == '&') {
11588 if ((!terminate) &&
11589 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11590 goto done;
11591 xmlParseReference(ctxt);
11592 } else {
11593 /* TODO Avoid the extra copy, handle directly !!! */
11594 /*
11595 * Goal of the following test is:
11596 * - minimize calls to the SAX 'character' callback
11597 * when they are mergeable
11598 * - handle an problem for isBlank when we only parse
11599 * a sequence of blank chars and the next one is
11600 * not available to check against '<' presence.
11601 * - tries to homogenize the differences in SAX
11602 * callbacks between the push and pull versions
11603 * of the parser.
11604 */
11605 if ((ctxt->inputNr == 1) &&
11606 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11607 if (!terminate) {
11608 if (ctxt->progressive) {
11609 if ((lastlt == NULL) ||
11610 (ctxt->input->cur > lastlt))
11611 goto done;
11612 } else if (xmlParseLookupSequence(ctxt,
11613 '<', 0, 0) < 0) {
11614 goto done;
11615 }
11616 }
11617 }
11618 ctxt->checkIndex = 0;
11619 xmlParseCharData(ctxt, 0);
11620 }
11621 if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11623 "detected an error in element content\n");
11624 xmlHaltParser(ctxt);
11625 break;
11626 }
11627 break;
11628 }
11629 case XML_PARSER_END_TAG:
11630 if (avail < 2)
11631 goto done;
11632 if (!terminate) {
11633 if (ctxt->progressive) {
11634 /* > can be found unescaped in attribute values */
11635 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11636 goto done;
11637 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11638 goto done;
11639 }
11640 }
11641 if (ctxt->sax2) {
11642 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11643 nameNsPop(ctxt);
11644 }
11645 #ifdef LIBXML_SAX1_ENABLED
11646 else
11647 xmlParseEndTag1(ctxt, 0);
11648 #endif /* LIBXML_SAX1_ENABLED */
11649 if (ctxt->instate == XML_PARSER_EOF) {
11650 /* Nothing */
11651 } else if (ctxt->nameNr == 0) {
11652 ctxt->instate = XML_PARSER_EPILOG;
11653 } else {
11654 ctxt->instate = XML_PARSER_CONTENT;
11655 }
11656 break;
11657 case XML_PARSER_CDATA_SECTION: {
11658 /*
11659 * The Push mode need to have the SAX callback for
11660 * cdataBlock merge back contiguous callbacks.
11661 */
11662 int base;
11663
11664 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11665 if (base < 0) {
11666 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11667 int tmp;
11668
11669 tmp = xmlCheckCdataPush(ctxt->input->cur,
11670 XML_PARSER_BIG_BUFFER_SIZE, 0);
11671 if (tmp < 0) {
11672 tmp = -tmp;
11673 ctxt->input->cur += tmp;
11674 goto encoding_error;
11675 }
11676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11677 if (ctxt->sax->cdataBlock != NULL)
11678 ctxt->sax->cdataBlock(ctxt->userData,
11679 ctxt->input->cur, tmp);
11680 else if (ctxt->sax->characters != NULL)
11681 ctxt->sax->characters(ctxt->userData,
11682 ctxt->input->cur, tmp);
11683 }
11684 if (ctxt->instate == XML_PARSER_EOF)
11685 goto done;
11686 SKIPL(tmp);
11687 ctxt->checkIndex = 0;
11688 }
11689 goto done;
11690 } else {
11691 int tmp;
11692
11693 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11694 if ((tmp < 0) || (tmp != base)) {
11695 tmp = -tmp;
11696 ctxt->input->cur += tmp;
11697 goto encoding_error;
11698 }
11699 if ((ctxt->sax != NULL) && (base == 0) &&
11700 (ctxt->sax->cdataBlock != NULL) &&
11701 (!ctxt->disableSAX)) {
11702 /*
11703 * Special case to provide identical behaviour
11704 * between pull and push parsers on enpty CDATA
11705 * sections
11706 */
11707 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11708 (!strncmp((const char *)&ctxt->input->cur[-9],
11709 "<![CDATA[", 9)))
11710 ctxt->sax->cdataBlock(ctxt->userData,
11711 BAD_CAST "", 0);
11712 } else if ((ctxt->sax != NULL) && (base > 0) &&
11713 (!ctxt->disableSAX)) {
11714 if (ctxt->sax->cdataBlock != NULL)
11715 ctxt->sax->cdataBlock(ctxt->userData,
11716 ctxt->input->cur, base);
11717 else if (ctxt->sax->characters != NULL)
11718 ctxt->sax->characters(ctxt->userData,
11719 ctxt->input->cur, base);
11720 }
11721 if (ctxt->instate == XML_PARSER_EOF)
11722 goto done;
11723 SKIPL(base + 3);
11724 ctxt->checkIndex = 0;
11725 ctxt->instate = XML_PARSER_CONTENT;
11726 #ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering CONTENT\n");
11729 #endif
11730 }
11731 break;
11732 }
11733 case XML_PARSER_MISC:
11734 SKIP_BLANKS;
11735 if (ctxt->input->buf == NULL)
11736 avail = ctxt->input->length -
11737 (ctxt->input->cur - ctxt->input->base);
11738 else
11739 avail = xmlBufUse(ctxt->input->buf->buffer) -
11740 (ctxt->input->cur - ctxt->input->base);
11741 if (avail < 2)
11742 goto done;
11743 cur = ctxt->input->cur[0];
11744 next = ctxt->input->cur[1];
11745 if ((cur == '<') && (next == '?')) {
11746 if ((!terminate) &&
11747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748 ctxt->progressive = XML_PARSER_PI;
11749 goto done;
11750 }
11751 #ifdef DEBUG_PUSH
11752 xmlGenericError(xmlGenericErrorContext,
11753 "PP: Parsing PI\n");
11754 #endif
11755 xmlParsePI(ctxt);
11756 if (ctxt->instate == XML_PARSER_EOF)
11757 goto done;
11758 ctxt->instate = XML_PARSER_MISC;
11759 ctxt->progressive = 1;
11760 ctxt->checkIndex = 0;
11761 } else if ((cur == '<') && (next == '!') &&
11762 (ctxt->input->cur[2] == '-') &&
11763 (ctxt->input->cur[3] == '-')) {
11764 if ((!terminate) &&
11765 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11766 ctxt->progressive = XML_PARSER_COMMENT;
11767 goto done;
11768 }
11769 #ifdef DEBUG_PUSH
11770 xmlGenericError(xmlGenericErrorContext,
11771 "PP: Parsing Comment\n");
11772 #endif
11773 xmlParseComment(ctxt);
11774 if (ctxt->instate == XML_PARSER_EOF)
11775 goto done;
11776 ctxt->instate = XML_PARSER_MISC;
11777 ctxt->progressive = 1;
11778 ctxt->checkIndex = 0;
11779 } else if ((cur == '<') && (next == '!') &&
11780 (ctxt->input->cur[2] == 'D') &&
11781 (ctxt->input->cur[3] == 'O') &&
11782 (ctxt->input->cur[4] == 'C') &&
11783 (ctxt->input->cur[5] == 'T') &&
11784 (ctxt->input->cur[6] == 'Y') &&
11785 (ctxt->input->cur[7] == 'P') &&
11786 (ctxt->input->cur[8] == 'E')) {
11787 if ((!terminate) &&
11788 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11789 ctxt->progressive = XML_PARSER_DTD;
11790 goto done;
11791 }
11792 #ifdef DEBUG_PUSH
11793 xmlGenericError(xmlGenericErrorContext,
11794 "PP: Parsing internal subset\n");
11795 #endif
11796 ctxt->inSubset = 1;
11797 ctxt->progressive = 0;
11798 ctxt->checkIndex = 0;
11799 xmlParseDocTypeDecl(ctxt);
11800 if (ctxt->instate == XML_PARSER_EOF)
11801 goto done;
11802 if (RAW == '[') {
11803 ctxt->instate = XML_PARSER_DTD;
11804 #ifdef DEBUG_PUSH
11805 xmlGenericError(xmlGenericErrorContext,
11806 "PP: entering DTD\n");
11807 #endif
11808 } else {
11809 /*
11810 * Create and update the external subset.
11811 */
11812 ctxt->inSubset = 2;
11813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11814 (ctxt->sax->externalSubset != NULL))
11815 ctxt->sax->externalSubset(ctxt->userData,
11816 ctxt->intSubName, ctxt->extSubSystem,
11817 ctxt->extSubURI);
11818 ctxt->inSubset = 0;
11819 xmlCleanSpecialAttr(ctxt);
11820 ctxt->instate = XML_PARSER_PROLOG;
11821 #ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: entering PROLOG\n");
11824 #endif
11825 }
11826 } else if ((cur == '<') && (next == '!') &&
11827 (avail < 9)) {
11828 goto done;
11829 } else {
11830 ctxt->instate = XML_PARSER_START_TAG;
11831 ctxt->progressive = XML_PARSER_START_TAG;
11832 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11833 #ifdef DEBUG_PUSH
11834 xmlGenericError(xmlGenericErrorContext,
11835 "PP: entering START_TAG\n");
11836 #endif
11837 }
11838 break;
11839 case XML_PARSER_PROLOG:
11840 SKIP_BLANKS;
11841 if (ctxt->input->buf == NULL)
11842 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11843 else
11844 avail = xmlBufUse(ctxt->input->buf->buffer) -
11845 (ctxt->input->cur - ctxt->input->base);
11846 if (avail < 2)
11847 goto done;
11848 cur = ctxt->input->cur[0];
11849 next = ctxt->input->cur[1];
11850 if ((cur == '<') && (next == '?')) {
11851 if ((!terminate) &&
11852 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853 ctxt->progressive = XML_PARSER_PI;
11854 goto done;
11855 }
11856 #ifdef DEBUG_PUSH
11857 xmlGenericError(xmlGenericErrorContext,
11858 "PP: Parsing PI\n");
11859 #endif
11860 xmlParsePI(ctxt);
11861 if (ctxt->instate == XML_PARSER_EOF)
11862 goto done;
11863 ctxt->instate = XML_PARSER_PROLOG;
11864 ctxt->progressive = 1;
11865 } else if ((cur == '<') && (next == '!') &&
11866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867 if ((!terminate) &&
11868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869 ctxt->progressive = XML_PARSER_COMMENT;
11870 goto done;
11871 }
11872 #ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: Parsing Comment\n");
11875 #endif
11876 xmlParseComment(ctxt);
11877 if (ctxt->instate == XML_PARSER_EOF)
11878 goto done;
11879 ctxt->instate = XML_PARSER_PROLOG;
11880 ctxt->progressive = 1;
11881 } else if ((cur == '<') && (next == '!') &&
11882 (avail < 4)) {
11883 goto done;
11884 } else {
11885 ctxt->instate = XML_PARSER_START_TAG;
11886 if (ctxt->progressive == 0)
11887 ctxt->progressive = XML_PARSER_START_TAG;
11888 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11889 #ifdef DEBUG_PUSH
11890 xmlGenericError(xmlGenericErrorContext,
11891 "PP: entering START_TAG\n");
11892 #endif
11893 }
11894 break;
11895 case XML_PARSER_EPILOG:
11896 SKIP_BLANKS;
11897 if (ctxt->input->buf == NULL)
11898 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11899 else
11900 avail = xmlBufUse(ctxt->input->buf->buffer) -
11901 (ctxt->input->cur - ctxt->input->base);
11902 if (avail < 2)
11903 goto done;
11904 cur = ctxt->input->cur[0];
11905 next = ctxt->input->cur[1];
11906 if ((cur == '<') && (next == '?')) {
11907 if ((!terminate) &&
11908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909 ctxt->progressive = XML_PARSER_PI;
11910 goto done;
11911 }
11912 #ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: Parsing PI\n");
11915 #endif
11916 xmlParsePI(ctxt);
11917 if (ctxt->instate == XML_PARSER_EOF)
11918 goto done;
11919 ctxt->instate = XML_PARSER_EPILOG;
11920 ctxt->progressive = 1;
11921 } else if ((cur == '<') && (next == '!') &&
11922 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11923 if ((!terminate) &&
11924 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11925 ctxt->progressive = XML_PARSER_COMMENT;
11926 goto done;
11927 }
11928 #ifdef DEBUG_PUSH
11929 xmlGenericError(xmlGenericErrorContext,
11930 "PP: Parsing Comment\n");
11931 #endif
11932 xmlParseComment(ctxt);
11933 if (ctxt->instate == XML_PARSER_EOF)
11934 goto done;
11935 ctxt->instate = XML_PARSER_EPILOG;
11936 ctxt->progressive = 1;
11937 } else if ((cur == '<') && (next == '!') &&
11938 (avail < 4)) {
11939 goto done;
11940 } else {
11941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11942 xmlHaltParser(ctxt);
11943 #ifdef DEBUG_PUSH
11944 xmlGenericError(xmlGenericErrorContext,
11945 "PP: entering EOF\n");
11946 #endif
11947 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11948 ctxt->sax->endDocument(ctxt->userData);
11949 goto done;
11950 }
11951 break;
11952 case XML_PARSER_DTD: {
11953 /*
11954 * Sorry but progressive parsing of the internal subset
11955 * is not expected to be supported. We first check that
11956 * the full content of the internal subset is available and
11957 * the parsing is launched only at that point.
11958 * Internal subset ends up with "']' S? '>'" in an unescaped
11959 * section and not in a ']]>' sequence which are conditional
11960 * sections (whoever argued to keep that crap in XML deserve
11961 * a place in hell !).
11962 */
11963 int base, i;
11964 xmlChar *buf;
11965 xmlChar quote = 0;
11966 size_t use;
11967
11968 base = ctxt->input->cur - ctxt->input->base;
11969 if (base < 0) return(0);
11970 if (ctxt->checkIndex > base)
11971 base = ctxt->checkIndex;
11972 buf = xmlBufContent(ctxt->input->buf->buffer);
11973 use = xmlBufUse(ctxt->input->buf->buffer);
11974 for (;(unsigned int) base < use; base++) {
11975 if (quote != 0) {
11976 if (buf[base] == quote)
11977 quote = 0;
11978 continue;
11979 }
11980 if ((quote == 0) && (buf[base] == '<')) {
11981 int found = 0;
11982 /* special handling of comments */
11983 if (((unsigned int) base + 4 < use) &&
11984 (buf[base + 1] == '!') &&
11985 (buf[base + 2] == '-') &&
11986 (buf[base + 3] == '-')) {
11987 for (;(unsigned int) base + 3 < use; base++) {
11988 if ((buf[base] == '-') &&
11989 (buf[base + 1] == '-') &&
11990 (buf[base + 2] == '>')) {
11991 found = 1;
11992 base += 2;
11993 break;
11994 }
11995 }
11996 if (!found) {
11997 #if 0
11998 fprintf(stderr, "unfinished comment\n");
11999 #endif
12000 break; /* for */
12001 }
12002 continue;
12003 }
12004 }
12005 if (buf[base] == '"') {
12006 quote = '"';
12007 continue;
12008 }
12009 if (buf[base] == '\'') {
12010 quote = '\'';
12011 continue;
12012 }
12013 if (buf[base] == ']') {
12014 #if 0
12015 fprintf(stderr, "%c%c%c%c: ", buf[base],
12016 buf[base + 1], buf[base + 2], buf[base + 3]);
12017 #endif
12018 if ((unsigned int) base +1 >= use)
12019 break;
12020 if (buf[base + 1] == ']') {
12021 /* conditional crap, skip both ']' ! */
12022 base++;
12023 continue;
12024 }
12025 for (i = 1; (unsigned int) base + i < use; i++) {
12026 if (buf[base + i] == '>') {
12027 #if 0
12028 fprintf(stderr, "found\n");
12029 #endif
12030 goto found_end_int_subset;
12031 }
12032 if (!IS_BLANK_CH(buf[base + i])) {
12033 #if 0
12034 fprintf(stderr, "not found\n");
12035 #endif
12036 goto not_end_of_int_subset;
12037 }
12038 }
12039 #if 0
12040 fprintf(stderr, "end of stream\n");
12041 #endif
12042 break;
12043
12044 }
12045 not_end_of_int_subset:
12046 continue; /* for */
12047 }
12048 /*
12049 * We didn't found the end of the Internal subset
12050 */
12051 if (quote == 0)
12052 ctxt->checkIndex = base;
12053 else
12054 ctxt->checkIndex = 0;
12055 #ifdef DEBUG_PUSH
12056 if (next == 0)
12057 xmlGenericError(xmlGenericErrorContext,
12058 "PP: lookup of int subset end filed\n");
12059 #endif
12060 goto done;
12061
12062 found_end_int_subset:
12063 ctxt->checkIndex = 0;
12064 xmlParseInternalSubset(ctxt);
12065 if (ctxt->instate == XML_PARSER_EOF)
12066 goto done;
12067 ctxt->inSubset = 2;
12068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12069 (ctxt->sax->externalSubset != NULL))
12070 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12071 ctxt->extSubSystem, ctxt->extSubURI);
12072 ctxt->inSubset = 0;
12073 xmlCleanSpecialAttr(ctxt);
12074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
12076 ctxt->instate = XML_PARSER_PROLOG;
12077 ctxt->checkIndex = 0;
12078 #ifdef DEBUG_PUSH
12079 xmlGenericError(xmlGenericErrorContext,
12080 "PP: entering PROLOG\n");
12081 #endif
12082 break;
12083 }
12084 case XML_PARSER_COMMENT:
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: internal error, state == COMMENT\n");
12087 ctxt->instate = XML_PARSER_CONTENT;
12088 #ifdef DEBUG_PUSH
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: entering CONTENT\n");
12091 #endif
12092 break;
12093 case XML_PARSER_IGNORE:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == IGNORE");
12096 ctxt->instate = XML_PARSER_DTD;
12097 #ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering DTD\n");
12100 #endif
12101 break;
12102 case XML_PARSER_PI:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == PI\n");
12105 ctxt->instate = XML_PARSER_CONTENT;
12106 #ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering CONTENT\n");
12109 #endif
12110 break;
12111 case XML_PARSER_ENTITY_DECL:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == ENTITY_DECL\n");
12114 ctxt->instate = XML_PARSER_DTD;
12115 #ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering DTD\n");
12118 #endif
12119 break;
12120 case XML_PARSER_ENTITY_VALUE:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ENTITY_VALUE\n");
12123 ctxt->instate = XML_PARSER_CONTENT;
12124 #ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering DTD\n");
12127 #endif
12128 break;
12129 case XML_PARSER_ATTRIBUTE_VALUE:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12132 ctxt->instate = XML_PARSER_START_TAG;
12133 #ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering START_TAG\n");
12136 #endif
12137 break;
12138 case XML_PARSER_SYSTEM_LITERAL:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == SYSTEM_LITERAL\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145 #endif
12146 break;
12147 case XML_PARSER_PUBLIC_LITERAL:
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: internal error, state == PUBLIC_LITERAL\n");
12150 ctxt->instate = XML_PARSER_START_TAG;
12151 #ifdef DEBUG_PUSH
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: entering START_TAG\n");
12154 #endif
12155 break;
12156 }
12157 }
12158 done:
12159 #ifdef DEBUG_PUSH
12160 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12161 #endif
12162 return(ret);
12163 encoding_error:
12164 {
12165 char buffer[150];
12166
12167 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168 ctxt->input->cur[0], ctxt->input->cur[1],
12169 ctxt->input->cur[2], ctxt->input->cur[3]);
12170 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12171 "Input is not proper UTF-8, indicate encoding !\n%s",
12172 BAD_CAST buffer, NULL);
12173 }
12174 return(0);
12175 }
12176
12177 /**
12178 * xmlParseCheckTransition:
12179 * @ctxt: an XML parser context
12180 * @chunk: a char array
12181 * @size: the size in byte of the chunk
12182 *
12183 * Check depending on the current parser state if the chunk given must be
12184 * processed immediately or one need more data to advance on parsing.
12185 *
12186 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12187 */
12188 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12189 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12190 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12191 return(-1);
12192 if (ctxt->instate == XML_PARSER_START_TAG) {
12193 if (memchr(chunk, '>', size) != NULL)
12194 return(1);
12195 return(0);
12196 }
12197 if (ctxt->progressive == XML_PARSER_COMMENT) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12201 }
12202 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12206 }
12207 if (ctxt->progressive == XML_PARSER_PI) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12211 }
12212 if (ctxt->instate == XML_PARSER_END_TAG) {
12213 if (memchr(chunk, '>', size) != NULL)
12214 return(1);
12215 return(0);
12216 }
12217 if ((ctxt->progressive == XML_PARSER_DTD) ||
12218 (ctxt->instate == XML_PARSER_DTD)) {
12219 if (memchr(chunk, '>', size) != NULL)
12220 return(1);
12221 return(0);
12222 }
12223 return(1);
12224 }
12225
12226 /**
12227 * xmlParseChunk:
12228 * @ctxt: an XML parser context
12229 * @chunk: an char array
12230 * @size: the size in byte of the chunk
12231 * @terminate: last chunk indicator
12232 *
12233 * Parse a Chunk of memory
12234 *
12235 * Returns zero if no error, the xmlParserErrors otherwise.
12236 */
12237 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12238 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12239 int terminate) {
12240 int end_in_lf = 0;
12241 int remain = 0;
12242 size_t old_avail = 0;
12243 size_t avail = 0;
12244
12245 if (ctxt == NULL)
12246 return(XML_ERR_INTERNAL_ERROR);
12247 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12248 return(ctxt->errNo);
12249 if (ctxt->instate == XML_PARSER_EOF)
12250 return(-1);
12251 if (ctxt->instate == XML_PARSER_START)
12252 xmlDetectSAX2(ctxt);
12253 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12254 (chunk[size - 1] == '\r')) {
12255 end_in_lf = 1;
12256 size--;
12257 }
12258
12259 xmldecl_done:
12260
12261 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12262 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12263 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12264 size_t cur = ctxt->input->cur - ctxt->input->base;
12265 int res;
12266
12267 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12268 /*
12269 * Specific handling if we autodetected an encoding, we should not
12270 * push more than the first line ... which depend on the encoding
12271 * And only push the rest once the final encoding was detected
12272 */
12273 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12274 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12275 unsigned int len = 45;
12276
12277 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UTF-16")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UTF16")))
12281 len = 90;
12282 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283 BAD_CAST "UCS-4")) ||
12284 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285 BAD_CAST "UCS4")))
12286 len = 180;
12287
12288 if (ctxt->input->buf->rawconsumed < len)
12289 len -= ctxt->input->buf->rawconsumed;
12290
12291 /*
12292 * Change size for reading the initial declaration only
12293 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294 * will blindly copy extra bytes from memory.
12295 */
12296 if ((unsigned int) size > len) {
12297 remain = size - len;
12298 size = len;
12299 } else {
12300 remain = 0;
12301 }
12302 }
12303 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12305 if (res < 0) {
12306 ctxt->errNo = XML_PARSER_EOF;
12307 xmlHaltParser(ctxt);
12308 return (XML_PARSER_EOF);
12309 }
12310 #ifdef DEBUG_PUSH
12311 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12312 #endif
12313
12314 } else if (ctxt->instate != XML_PARSER_EOF) {
12315 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12316 xmlParserInputBufferPtr in = ctxt->input->buf;
12317 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12318 (in->raw != NULL)) {
12319 int nbchars;
12320 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12321 size_t current = ctxt->input->cur - ctxt->input->base;
12322
12323 nbchars = xmlCharEncInput(in, terminate);
12324 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12325 if (nbchars < 0) {
12326 /* TODO 2.6.0 */
12327 xmlGenericError(xmlGenericErrorContext,
12328 "xmlParseChunk: encoder error\n");
12329 xmlHaltParser(ctxt);
12330 return(XML_ERR_INVALID_ENCODING);
12331 }
12332 }
12333 }
12334 }
12335 if (remain != 0) {
12336 xmlParseTryOrFinish(ctxt, 0);
12337 } else {
12338 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12339 avail = xmlBufUse(ctxt->input->buf->buffer);
12340 /*
12341 * Depending on the current state it may not be such
12342 * a good idea to try parsing if there is nothing in the chunk
12343 * which would be worth doing a parser state transition and we
12344 * need to wait for more data
12345 */
12346 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12347 (old_avail == 0) || (avail == 0) ||
12348 (xmlParseCheckTransition(ctxt,
12349 (const char *)&ctxt->input->base[old_avail],
12350 avail - old_avail)))
12351 xmlParseTryOrFinish(ctxt, terminate);
12352 }
12353 if (ctxt->instate == XML_PARSER_EOF)
12354 return(ctxt->errNo);
12355
12356 if ((ctxt->input != NULL) &&
12357 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12358 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12359 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12360 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12361 xmlHaltParser(ctxt);
12362 }
12363 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12364 return(ctxt->errNo);
12365
12366 if (remain != 0) {
12367 chunk += size;
12368 size = remain;
12369 remain = 0;
12370 goto xmldecl_done;
12371 }
12372 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12373 (ctxt->input->buf != NULL)) {
12374 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12375 ctxt->input);
12376 size_t current = ctxt->input->cur - ctxt->input->base;
12377
12378 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12379
12380 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12381 base, current);
12382 }
12383 if (terminate) {
12384 /*
12385 * Check for termination
12386 */
12387 int cur_avail = 0;
12388
12389 if (ctxt->input != NULL) {
12390 if (ctxt->input->buf == NULL)
12391 cur_avail = ctxt->input->length -
12392 (ctxt->input->cur - ctxt->input->base);
12393 else
12394 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12395 (ctxt->input->cur - ctxt->input->base);
12396 }
12397
12398 if ((ctxt->instate != XML_PARSER_EOF) &&
12399 (ctxt->instate != XML_PARSER_EPILOG)) {
12400 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12401 }
12402 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12403 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12404 }
12405 if (ctxt->instate != XML_PARSER_EOF) {
12406 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12407 ctxt->sax->endDocument(ctxt->userData);
12408 }
12409 ctxt->instate = XML_PARSER_EOF;
12410 }
12411 if (ctxt->wellFormed == 0)
12412 return((xmlParserErrors) ctxt->errNo);
12413 else
12414 return(0);
12415 }
12416
12417 /************************************************************************
12418 * *
12419 * I/O front end functions to the parser *
12420 * *
12421 ************************************************************************/
12422
12423 /**
12424 * xmlCreatePushParserCtxt:
12425 * @sax: a SAX handler
12426 * @user_data: The user data returned on SAX callbacks
12427 * @chunk: a pointer to an array of chars
12428 * @size: number of chars in the array
12429 * @filename: an optional file name or URI
12430 *
12431 * Create a parser context for using the XML parser in push mode.
12432 * If @buffer and @size are non-NULL, the data is used to detect
12433 * the encoding. The remaining characters will be parsed so they
12434 * don't need to be fed in again through xmlParseChunk.
12435 * To allow content encoding detection, @size should be >= 4
12436 * The value of @filename is used for fetching external entities
12437 * and error/warning reports.
12438 *
12439 * Returns the new parser context or NULL
12440 */
12441
12442 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12443 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12444 const char *chunk, int size, const char *filename) {
12445 xmlParserCtxtPtr ctxt;
12446 xmlParserInputPtr inputStream;
12447 xmlParserInputBufferPtr buf;
12448 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12449
12450 /*
12451 * plug some encoding conversion routines
12452 */
12453 if ((chunk != NULL) && (size >= 4))
12454 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12455
12456 buf = xmlAllocParserInputBuffer(enc);
12457 if (buf == NULL) return(NULL);
12458
12459 ctxt = xmlNewParserCtxt();
12460 if (ctxt == NULL) {
12461 xmlErrMemory(NULL, "creating parser: out of memory\n");
12462 xmlFreeParserInputBuffer(buf);
12463 return(NULL);
12464 }
12465 ctxt->dictNames = 1;
12466 if (sax != NULL) {
12467 #ifdef LIBXML_SAX1_ENABLED
12468 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12469 #endif /* LIBXML_SAX1_ENABLED */
12470 xmlFree(ctxt->sax);
12471 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12472 if (ctxt->sax == NULL) {
12473 xmlErrMemory(ctxt, NULL);
12474 xmlFreeParserInputBuffer(buf);
12475 xmlFreeParserCtxt(ctxt);
12476 return(NULL);
12477 }
12478 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12479 if (sax->initialized == XML_SAX2_MAGIC)
12480 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12481 else
12482 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12483 if (user_data != NULL)
12484 ctxt->userData = user_data;
12485 }
12486 if (filename == NULL) {
12487 ctxt->directory = NULL;
12488 } else {
12489 ctxt->directory = xmlParserGetDirectory(filename);
12490 }
12491
12492 inputStream = xmlNewInputStream(ctxt);
12493 if (inputStream == NULL) {
12494 xmlFreeParserCtxt(ctxt);
12495 xmlFreeParserInputBuffer(buf);
12496 return(NULL);
12497 }
12498
12499 if (filename == NULL)
12500 inputStream->filename = NULL;
12501 else {
12502 inputStream->filename = (char *)
12503 xmlCanonicPath((const xmlChar *) filename);
12504 if (inputStream->filename == NULL) {
12505 xmlFreeParserCtxt(ctxt);
12506 xmlFreeParserInputBuffer(buf);
12507 return(NULL);
12508 }
12509 }
12510 inputStream->buf = buf;
12511 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12512 inputPush(ctxt, inputStream);
12513
12514 /*
12515 * If the caller didn't provide an initial 'chunk' for determining
12516 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517 * that it can be automatically determined later
12518 */
12519 if ((size == 0) || (chunk == NULL)) {
12520 ctxt->charset = XML_CHAR_ENCODING_NONE;
12521 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12522 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12523 size_t cur = ctxt->input->cur - ctxt->input->base;
12524
12525 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12526
12527 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12528 #ifdef DEBUG_PUSH
12529 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12530 #endif
12531 }
12532
12533 if (enc != XML_CHAR_ENCODING_NONE) {
12534 xmlSwitchEncoding(ctxt, enc);
12535 }
12536
12537 return(ctxt);
12538 }
12539 #endif /* LIBXML_PUSH_ENABLED */
12540
12541 /**
12542 * xmlHaltParser:
12543 * @ctxt: an XML parser context
12544 *
12545 * Blocks further parser processing don't override error
12546 * for internal use
12547 */
12548 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12549 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12550 if (ctxt == NULL)
12551 return;
12552 ctxt->instate = XML_PARSER_EOF;
12553 ctxt->disableSAX = 1;
12554 while (ctxt->inputNr > 1)
12555 xmlFreeInputStream(inputPop(ctxt));
12556 if (ctxt->input != NULL) {
12557 /*
12558 * in case there was a specific allocation deallocate before
12559 * overriding base
12560 */
12561 if (ctxt->input->free != NULL) {
12562 ctxt->input->free((xmlChar *) ctxt->input->base);
12563 ctxt->input->free = NULL;
12564 }
12565 if (ctxt->input->buf != NULL) {
12566 xmlFreeParserInputBuffer(ctxt->input->buf);
12567 ctxt->input->buf = NULL;
12568 }
12569 ctxt->input->cur = BAD_CAST"";
12570 ctxt->input->length = 0;
12571 ctxt->input->base = ctxt->input->cur;
12572 ctxt->input->end = ctxt->input->cur;
12573 }
12574 }
12575
12576 /**
12577 * xmlStopParser:
12578 * @ctxt: an XML parser context
12579 *
12580 * Blocks further parser processing
12581 */
12582 void
xmlStopParser(xmlParserCtxtPtr ctxt)12583 xmlStopParser(xmlParserCtxtPtr ctxt) {
12584 if (ctxt == NULL)
12585 return;
12586 xmlHaltParser(ctxt);
12587 ctxt->errNo = XML_ERR_USER_STOP;
12588 }
12589
12590 /**
12591 * xmlCreateIOParserCtxt:
12592 * @sax: a SAX handler
12593 * @user_data: The user data returned on SAX callbacks
12594 * @ioread: an I/O read function
12595 * @ioclose: an I/O close function
12596 * @ioctx: an I/O handler
12597 * @enc: the charset encoding if known
12598 *
12599 * Create a parser context for using the XML parser with an existing
12600 * I/O stream
12601 *
12602 * Returns the new parser context or NULL
12603 */
12604 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12605 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12606 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12607 void *ioctx, xmlCharEncoding enc) {
12608 xmlParserCtxtPtr ctxt;
12609 xmlParserInputPtr inputStream;
12610 xmlParserInputBufferPtr buf;
12611
12612 if (ioread == NULL) return(NULL);
12613
12614 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12615 if (buf == NULL) {
12616 if (ioclose != NULL)
12617 ioclose(ioctx);
12618 return (NULL);
12619 }
12620
12621 ctxt = xmlNewParserCtxt();
12622 if (ctxt == NULL) {
12623 xmlFreeParserInputBuffer(buf);
12624 return(NULL);
12625 }
12626 if (sax != NULL) {
12627 #ifdef LIBXML_SAX1_ENABLED
12628 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12629 #endif /* LIBXML_SAX1_ENABLED */
12630 xmlFree(ctxt->sax);
12631 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12632 if (ctxt->sax == NULL) {
12633 xmlFreeParserInputBuffer(buf);
12634 xmlErrMemory(ctxt, NULL);
12635 xmlFreeParserCtxt(ctxt);
12636 return(NULL);
12637 }
12638 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12639 if (sax->initialized == XML_SAX2_MAGIC)
12640 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12641 else
12642 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12643 if (user_data != NULL)
12644 ctxt->userData = user_data;
12645 }
12646
12647 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12648 if (inputStream == NULL) {
12649 xmlFreeParserCtxt(ctxt);
12650 return(NULL);
12651 }
12652 inputPush(ctxt, inputStream);
12653
12654 return(ctxt);
12655 }
12656
12657 #ifdef LIBXML_VALID_ENABLED
12658 /************************************************************************
12659 * *
12660 * Front ends when parsing a DTD *
12661 * *
12662 ************************************************************************/
12663
12664 /**
12665 * xmlIOParseDTD:
12666 * @sax: the SAX handler block or NULL
12667 * @input: an Input Buffer
12668 * @enc: the charset encoding if known
12669 *
12670 * Load and parse a DTD
12671 *
12672 * Returns the resulting xmlDtdPtr or NULL in case of error.
12673 * @input will be freed by the function in any case.
12674 */
12675
12676 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12677 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12678 xmlCharEncoding enc) {
12679 xmlDtdPtr ret = NULL;
12680 xmlParserCtxtPtr ctxt;
12681 xmlParserInputPtr pinput = NULL;
12682 xmlChar start[4];
12683
12684 if (input == NULL)
12685 return(NULL);
12686
12687 ctxt = xmlNewParserCtxt();
12688 if (ctxt == NULL) {
12689 xmlFreeParserInputBuffer(input);
12690 return(NULL);
12691 }
12692
12693 /* We are loading a DTD */
12694 ctxt->options |= XML_PARSE_DTDLOAD;
12695
12696 /*
12697 * Set-up the SAX context
12698 */
12699 if (sax != NULL) {
12700 if (ctxt->sax != NULL)
12701 xmlFree(ctxt->sax);
12702 ctxt->sax = sax;
12703 ctxt->userData = ctxt;
12704 }
12705 xmlDetectSAX2(ctxt);
12706
12707 /*
12708 * generate a parser input from the I/O handler
12709 */
12710
12711 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12712 if (pinput == NULL) {
12713 if (sax != NULL) ctxt->sax = NULL;
12714 xmlFreeParserInputBuffer(input);
12715 xmlFreeParserCtxt(ctxt);
12716 return(NULL);
12717 }
12718
12719 /*
12720 * plug some encoding conversion routines here.
12721 */
12722 if (xmlPushInput(ctxt, pinput) < 0) {
12723 if (sax != NULL) ctxt->sax = NULL;
12724 xmlFreeParserCtxt(ctxt);
12725 return(NULL);
12726 }
12727 if (enc != XML_CHAR_ENCODING_NONE) {
12728 xmlSwitchEncoding(ctxt, enc);
12729 }
12730
12731 pinput->filename = NULL;
12732 pinput->line = 1;
12733 pinput->col = 1;
12734 pinput->base = ctxt->input->cur;
12735 pinput->cur = ctxt->input->cur;
12736 pinput->free = NULL;
12737
12738 /*
12739 * let's parse that entity knowing it's an external subset.
12740 */
12741 ctxt->inSubset = 2;
12742 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12743 if (ctxt->myDoc == NULL) {
12744 xmlErrMemory(ctxt, "New Doc failed");
12745 return(NULL);
12746 }
12747 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12748 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12749 BAD_CAST "none", BAD_CAST "none");
12750
12751 if ((enc == XML_CHAR_ENCODING_NONE) &&
12752 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12753 /*
12754 * Get the 4 first bytes and decode the charset
12755 * if enc != XML_CHAR_ENCODING_NONE
12756 * plug some encoding conversion routines.
12757 */
12758 start[0] = RAW;
12759 start[1] = NXT(1);
12760 start[2] = NXT(2);
12761 start[3] = NXT(3);
12762 enc = xmlDetectCharEncoding(start, 4);
12763 if (enc != XML_CHAR_ENCODING_NONE) {
12764 xmlSwitchEncoding(ctxt, enc);
12765 }
12766 }
12767
12768 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12769
12770 if (ctxt->myDoc != NULL) {
12771 if (ctxt->wellFormed) {
12772 ret = ctxt->myDoc->extSubset;
12773 ctxt->myDoc->extSubset = NULL;
12774 if (ret != NULL) {
12775 xmlNodePtr tmp;
12776
12777 ret->doc = NULL;
12778 tmp = ret->children;
12779 while (tmp != NULL) {
12780 tmp->doc = NULL;
12781 tmp = tmp->next;
12782 }
12783 }
12784 } else {
12785 ret = NULL;
12786 }
12787 xmlFreeDoc(ctxt->myDoc);
12788 ctxt->myDoc = NULL;
12789 }
12790 if (sax != NULL) ctxt->sax = NULL;
12791 xmlFreeParserCtxt(ctxt);
12792
12793 return(ret);
12794 }
12795
12796 /**
12797 * xmlSAXParseDTD:
12798 * @sax: the SAX handler block
12799 * @ExternalID: a NAME* containing the External ID of the DTD
12800 * @SystemID: a NAME* containing the URL to the DTD
12801 *
12802 * Load and parse an external subset.
12803 *
12804 * Returns the resulting xmlDtdPtr or NULL in case of error.
12805 */
12806
12807 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12808 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12809 const xmlChar *SystemID) {
12810 xmlDtdPtr ret = NULL;
12811 xmlParserCtxtPtr ctxt;
12812 xmlParserInputPtr input = NULL;
12813 xmlCharEncoding enc;
12814 xmlChar* systemIdCanonic;
12815
12816 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12817
12818 ctxt = xmlNewParserCtxt();
12819 if (ctxt == NULL) {
12820 return(NULL);
12821 }
12822
12823 /* We are loading a DTD */
12824 ctxt->options |= XML_PARSE_DTDLOAD;
12825
12826 /*
12827 * Set-up the SAX context
12828 */
12829 if (sax != NULL) {
12830 if (ctxt->sax != NULL)
12831 xmlFree(ctxt->sax);
12832 ctxt->sax = sax;
12833 ctxt->userData = ctxt;
12834 }
12835
12836 /*
12837 * Canonicalise the system ID
12838 */
12839 systemIdCanonic = xmlCanonicPath(SystemID);
12840 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12841 xmlFreeParserCtxt(ctxt);
12842 return(NULL);
12843 }
12844
12845 /*
12846 * Ask the Entity resolver to load the damn thing
12847 */
12848
12849 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12850 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12851 systemIdCanonic);
12852 if (input == NULL) {
12853 if (sax != NULL) ctxt->sax = NULL;
12854 xmlFreeParserCtxt(ctxt);
12855 if (systemIdCanonic != NULL)
12856 xmlFree(systemIdCanonic);
12857 return(NULL);
12858 }
12859
12860 /*
12861 * plug some encoding conversion routines here.
12862 */
12863 if (xmlPushInput(ctxt, input) < 0) {
12864 if (sax != NULL) ctxt->sax = NULL;
12865 xmlFreeParserCtxt(ctxt);
12866 if (systemIdCanonic != NULL)
12867 xmlFree(systemIdCanonic);
12868 return(NULL);
12869 }
12870 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12871 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12872 xmlSwitchEncoding(ctxt, enc);
12873 }
12874
12875 if (input->filename == NULL)
12876 input->filename = (char *) systemIdCanonic;
12877 else
12878 xmlFree(systemIdCanonic);
12879 input->line = 1;
12880 input->col = 1;
12881 input->base = ctxt->input->cur;
12882 input->cur = ctxt->input->cur;
12883 input->free = NULL;
12884
12885 /*
12886 * let's parse that entity knowing it's an external subset.
12887 */
12888 ctxt->inSubset = 2;
12889 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12890 if (ctxt->myDoc == NULL) {
12891 xmlErrMemory(ctxt, "New Doc failed");
12892 if (sax != NULL) ctxt->sax = NULL;
12893 xmlFreeParserCtxt(ctxt);
12894 return(NULL);
12895 }
12896 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12897 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12898 ExternalID, SystemID);
12899 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12900
12901 if (ctxt->myDoc != NULL) {
12902 if (ctxt->wellFormed) {
12903 ret = ctxt->myDoc->extSubset;
12904 ctxt->myDoc->extSubset = NULL;
12905 if (ret != NULL) {
12906 xmlNodePtr tmp;
12907
12908 ret->doc = NULL;
12909 tmp = ret->children;
12910 while (tmp != NULL) {
12911 tmp->doc = NULL;
12912 tmp = tmp->next;
12913 }
12914 }
12915 } else {
12916 ret = NULL;
12917 }
12918 xmlFreeDoc(ctxt->myDoc);
12919 ctxt->myDoc = NULL;
12920 }
12921 if (sax != NULL) ctxt->sax = NULL;
12922 xmlFreeParserCtxt(ctxt);
12923
12924 return(ret);
12925 }
12926
12927
12928 /**
12929 * xmlParseDTD:
12930 * @ExternalID: a NAME* containing the External ID of the DTD
12931 * @SystemID: a NAME* containing the URL to the DTD
12932 *
12933 * Load and parse an external subset.
12934 *
12935 * Returns the resulting xmlDtdPtr or NULL in case of error.
12936 */
12937
12938 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12939 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12940 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12941 }
12942 #endif /* LIBXML_VALID_ENABLED */
12943
12944 /************************************************************************
12945 * *
12946 * Front ends when parsing an Entity *
12947 * *
12948 ************************************************************************/
12949
12950 /**
12951 * xmlParseCtxtExternalEntity:
12952 * @ctx: the existing parsing context
12953 * @URL: the URL for the entity to load
12954 * @ID: the System ID for the entity to load
12955 * @lst: the return value for the set of parsed nodes
12956 *
12957 * Parse an external general entity within an existing parsing context
12958 * An external general parsed entity is well-formed if it matches the
12959 * production labeled extParsedEnt.
12960 *
12961 * [78] extParsedEnt ::= TextDecl? content
12962 *
12963 * Returns 0 if the entity is well formed, -1 in case of args problem and
12964 * the parser error code otherwise
12965 */
12966
12967 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12969 const xmlChar *ID, xmlNodePtr *lst) {
12970 void *userData;
12971
12972 if (ctx == NULL) return(-1);
12973 /*
12974 * If the user provided their own SAX callbacks, then reuse the
12975 * userData callback field, otherwise the expected setup in a
12976 * DOM builder is to have userData == ctxt
12977 */
12978 if (ctx->userData == ctx)
12979 userData = NULL;
12980 else
12981 userData = ctx->userData;
12982 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12983 userData, ctx->depth + 1,
12984 URL, ID, lst);
12985 }
12986
12987 /**
12988 * xmlParseExternalEntityPrivate:
12989 * @doc: the document the chunk pertains to
12990 * @oldctxt: the previous parser context if available
12991 * @sax: the SAX handler block (possibly NULL)
12992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12993 * @depth: Used for loop detection, use 0
12994 * @URL: the URL for the entity to load
12995 * @ID: the System ID for the entity to load
12996 * @list: the return value for the set of parsed nodes
12997 *
12998 * Private version of xmlParseExternalEntity()
12999 *
13000 * Returns 0 if the entity is well formed, -1 in case of args problem and
13001 * the parser error code otherwise
13002 */
13003
13004 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13005 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13006 xmlSAXHandlerPtr sax,
13007 void *user_data, int depth, const xmlChar *URL,
13008 const xmlChar *ID, xmlNodePtr *list) {
13009 xmlParserCtxtPtr ctxt;
13010 xmlDocPtr newDoc;
13011 xmlNodePtr newRoot;
13012 xmlSAXHandlerPtr oldsax = NULL;
13013 xmlParserErrors ret = XML_ERR_OK;
13014 xmlChar start[4];
13015 xmlCharEncoding enc;
13016
13017 if (((depth > 40) &&
13018 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13019 (depth > 1024)) {
13020 return(XML_ERR_ENTITY_LOOP);
13021 }
13022
13023 if (list != NULL)
13024 *list = NULL;
13025 if ((URL == NULL) && (ID == NULL))
13026 return(XML_ERR_INTERNAL_ERROR);
13027 if (doc == NULL)
13028 return(XML_ERR_INTERNAL_ERROR);
13029
13030
13031 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13032 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13033 ctxt->userData = ctxt;
13034 if (sax != NULL) {
13035 oldsax = ctxt->sax;
13036 ctxt->sax = sax;
13037 if (user_data != NULL)
13038 ctxt->userData = user_data;
13039 }
13040 xmlDetectSAX2(ctxt);
13041 newDoc = xmlNewDoc(BAD_CAST "1.0");
13042 if (newDoc == NULL) {
13043 xmlFreeParserCtxt(ctxt);
13044 return(XML_ERR_INTERNAL_ERROR);
13045 }
13046 newDoc->properties = XML_DOC_INTERNAL;
13047 if (doc) {
13048 newDoc->intSubset = doc->intSubset;
13049 newDoc->extSubset = doc->extSubset;
13050 if (doc->dict) {
13051 newDoc->dict = doc->dict;
13052 xmlDictReference(newDoc->dict);
13053 }
13054 if (doc->URL != NULL) {
13055 newDoc->URL = xmlStrdup(doc->URL);
13056 }
13057 }
13058 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13059 if (newRoot == NULL) {
13060 if (sax != NULL)
13061 ctxt->sax = oldsax;
13062 xmlFreeParserCtxt(ctxt);
13063 newDoc->intSubset = NULL;
13064 newDoc->extSubset = NULL;
13065 xmlFreeDoc(newDoc);
13066 return(XML_ERR_INTERNAL_ERROR);
13067 }
13068 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13069 nodePush(ctxt, newDoc->children);
13070 if (doc == NULL) {
13071 ctxt->myDoc = newDoc;
13072 } else {
13073 ctxt->myDoc = doc;
13074 newRoot->doc = doc;
13075 }
13076
13077 /*
13078 * Get the 4 first bytes and decode the charset
13079 * if enc != XML_CHAR_ENCODING_NONE
13080 * plug some encoding conversion routines.
13081 */
13082 GROW;
13083 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13084 start[0] = RAW;
13085 start[1] = NXT(1);
13086 start[2] = NXT(2);
13087 start[3] = NXT(3);
13088 enc = xmlDetectCharEncoding(start, 4);
13089 if (enc != XML_CHAR_ENCODING_NONE) {
13090 xmlSwitchEncoding(ctxt, enc);
13091 }
13092 }
13093
13094 /*
13095 * Parse a possible text declaration first
13096 */
13097 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13098 xmlParseTextDecl(ctxt);
13099 /*
13100 * An XML-1.0 document can't reference an entity not XML-1.0
13101 */
13102 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13103 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13104 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13105 "Version mismatch between document and entity\n");
13106 }
13107 }
13108
13109 ctxt->instate = XML_PARSER_CONTENT;
13110 ctxt->depth = depth;
13111 if (oldctxt != NULL) {
13112 ctxt->_private = oldctxt->_private;
13113 ctxt->loadsubset = oldctxt->loadsubset;
13114 ctxt->validate = oldctxt->validate;
13115 ctxt->valid = oldctxt->valid;
13116 ctxt->replaceEntities = oldctxt->replaceEntities;
13117 if (oldctxt->validate) {
13118 ctxt->vctxt.error = oldctxt->vctxt.error;
13119 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13120 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13121 }
13122 ctxt->external = oldctxt->external;
13123 if (ctxt->dict) xmlDictFree(ctxt->dict);
13124 ctxt->dict = oldctxt->dict;
13125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13128 ctxt->dictNames = oldctxt->dictNames;
13129 ctxt->attsDefault = oldctxt->attsDefault;
13130 ctxt->attsSpecial = oldctxt->attsSpecial;
13131 ctxt->linenumbers = oldctxt->linenumbers;
13132 ctxt->record_info = oldctxt->record_info;
13133 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13134 ctxt->node_seq.length = oldctxt->node_seq.length;
13135 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13136 } else {
13137 /*
13138 * Doing validity checking on chunk without context
13139 * doesn't make sense
13140 */
13141 ctxt->_private = NULL;
13142 ctxt->validate = 0;
13143 ctxt->external = 2;
13144 ctxt->loadsubset = 0;
13145 }
13146
13147 xmlParseContent(ctxt);
13148
13149 if ((RAW == '<') && (NXT(1) == '/')) {
13150 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13151 } else if (RAW != 0) {
13152 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13153 }
13154 if (ctxt->node != newDoc->children) {
13155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156 }
13157
13158 if (!ctxt->wellFormed) {
13159 if (ctxt->errNo == 0)
13160 ret = XML_ERR_INTERNAL_ERROR;
13161 else
13162 ret = (xmlParserErrors)ctxt->errNo;
13163 } else {
13164 if (list != NULL) {
13165 xmlNodePtr cur;
13166
13167 /*
13168 * Return the newly created nodeset after unlinking it from
13169 * they pseudo parent.
13170 */
13171 cur = newDoc->children->children;
13172 *list = cur;
13173 while (cur != NULL) {
13174 cur->parent = NULL;
13175 cur = cur->next;
13176 }
13177 newDoc->children->children = NULL;
13178 }
13179 ret = XML_ERR_OK;
13180 }
13181
13182 /*
13183 * Record in the parent context the number of entities replacement
13184 * done when parsing that reference.
13185 */
13186 if (oldctxt != NULL)
13187 oldctxt->nbentities += ctxt->nbentities;
13188
13189 /*
13190 * Also record the size of the entity parsed
13191 */
13192 if (ctxt->input != NULL && oldctxt != NULL) {
13193 oldctxt->sizeentities += ctxt->input->consumed;
13194 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13195 }
13196 /*
13197 * And record the last error if any
13198 */
13199 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13200 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13201
13202 if (sax != NULL)
13203 ctxt->sax = oldsax;
13204 if (oldctxt != NULL) {
13205 ctxt->dict = NULL;
13206 ctxt->attsDefault = NULL;
13207 ctxt->attsSpecial = NULL;
13208 oldctxt->validate = ctxt->validate;
13209 oldctxt->valid = ctxt->valid;
13210 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13211 oldctxt->node_seq.length = ctxt->node_seq.length;
13212 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13213 }
13214 ctxt->node_seq.maximum = 0;
13215 ctxt->node_seq.length = 0;
13216 ctxt->node_seq.buffer = NULL;
13217 xmlFreeParserCtxt(ctxt);
13218 newDoc->intSubset = NULL;
13219 newDoc->extSubset = NULL;
13220 xmlFreeDoc(newDoc);
13221
13222 return(ret);
13223 }
13224
13225 #ifdef LIBXML_SAX1_ENABLED
13226 /**
13227 * xmlParseExternalEntity:
13228 * @doc: the document the chunk pertains to
13229 * @sax: the SAX handler block (possibly NULL)
13230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13231 * @depth: Used for loop detection, use 0
13232 * @URL: the URL for the entity to load
13233 * @ID: the System ID for the entity to load
13234 * @lst: the return value for the set of parsed nodes
13235 *
13236 * Parse an external general entity
13237 * An external general parsed entity is well-formed if it matches the
13238 * production labeled extParsedEnt.
13239 *
13240 * [78] extParsedEnt ::= TextDecl? content
13241 *
13242 * Returns 0 if the entity is well formed, -1 in case of args problem and
13243 * the parser error code otherwise
13244 */
13245
13246 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13247 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13248 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13249 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13250 ID, lst));
13251 }
13252
13253 /**
13254 * xmlParseBalancedChunkMemory:
13255 * @doc: the document the chunk pertains to (must not be NULL)
13256 * @sax: the SAX handler block (possibly NULL)
13257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13258 * @depth: Used for loop detection, use 0
13259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13260 * @lst: the return value for the set of parsed nodes
13261 *
13262 * Parse a well-balanced chunk of an XML document
13263 * called by the parser
13264 * The allowed sequence for the Well Balanced Chunk is the one defined by
13265 * the content production in the XML grammar:
13266 *
13267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13268 *
13269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270 * the parser error code otherwise
13271 */
13272
13273 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13274 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13275 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13276 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13277 depth, string, lst, 0 );
13278 }
13279 #endif /* LIBXML_SAX1_ENABLED */
13280
13281 /**
13282 * xmlParseBalancedChunkMemoryInternal:
13283 * @oldctxt: the existing parsing context
13284 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13285 * @user_data: the user data field for the parser context
13286 * @lst: the return value for the set of parsed nodes
13287 *
13288 *
13289 * Parse a well-balanced chunk of an XML document
13290 * called by the parser
13291 * The allowed sequence for the Well Balanced Chunk is the one defined by
13292 * the content production in the XML grammar:
13293 *
13294 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13295 *
13296 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297 * error code otherwise
13298 *
13299 * In case recover is set to 1, the nodelist will not be empty even if
13300 * the parsed chunk is not well balanced.
13301 */
13302 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13303 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13304 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13305 xmlParserCtxtPtr ctxt;
13306 xmlDocPtr newDoc = NULL;
13307 xmlNodePtr newRoot;
13308 xmlSAXHandlerPtr oldsax = NULL;
13309 xmlNodePtr content = NULL;
13310 xmlNodePtr last = NULL;
13311 int size;
13312 xmlParserErrors ret = XML_ERR_OK;
13313 #ifdef SAX2
13314 int i;
13315 #endif
13316
13317 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13318 (oldctxt->depth > 1024)) {
13319 return(XML_ERR_ENTITY_LOOP);
13320 }
13321
13322
13323 if (lst != NULL)
13324 *lst = NULL;
13325 if (string == NULL)
13326 return(XML_ERR_INTERNAL_ERROR);
13327
13328 size = xmlStrlen(string);
13329
13330 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13331 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13332 if (user_data != NULL)
13333 ctxt->userData = user_data;
13334 else
13335 ctxt->userData = ctxt;
13336 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13337 ctxt->dict = oldctxt->dict;
13338 ctxt->input_id = oldctxt->input_id + 1;
13339 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13340 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13341 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13342
13343 #ifdef SAX2
13344 /* propagate namespaces down the entity */
13345 for (i = 0;i < oldctxt->nsNr;i += 2) {
13346 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13347 }
13348 #endif
13349
13350 oldsax = ctxt->sax;
13351 ctxt->sax = oldctxt->sax;
13352 xmlDetectSAX2(ctxt);
13353 ctxt->replaceEntities = oldctxt->replaceEntities;
13354 ctxt->options = oldctxt->options;
13355
13356 ctxt->_private = oldctxt->_private;
13357 if (oldctxt->myDoc == NULL) {
13358 newDoc = xmlNewDoc(BAD_CAST "1.0");
13359 if (newDoc == NULL) {
13360 ctxt->sax = oldsax;
13361 ctxt->dict = NULL;
13362 xmlFreeParserCtxt(ctxt);
13363 return(XML_ERR_INTERNAL_ERROR);
13364 }
13365 newDoc->properties = XML_DOC_INTERNAL;
13366 newDoc->dict = ctxt->dict;
13367 xmlDictReference(newDoc->dict);
13368 ctxt->myDoc = newDoc;
13369 } else {
13370 ctxt->myDoc = oldctxt->myDoc;
13371 content = ctxt->myDoc->children;
13372 last = ctxt->myDoc->last;
13373 }
13374 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13375 if (newRoot == NULL) {
13376 ctxt->sax = oldsax;
13377 ctxt->dict = NULL;
13378 xmlFreeParserCtxt(ctxt);
13379 if (newDoc != NULL) {
13380 xmlFreeDoc(newDoc);
13381 }
13382 return(XML_ERR_INTERNAL_ERROR);
13383 }
13384 ctxt->myDoc->children = NULL;
13385 ctxt->myDoc->last = NULL;
13386 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13387 nodePush(ctxt, ctxt->myDoc->children);
13388 ctxt->instate = XML_PARSER_CONTENT;
13389 ctxt->depth = oldctxt->depth + 1;
13390
13391 ctxt->validate = 0;
13392 ctxt->loadsubset = oldctxt->loadsubset;
13393 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13394 /*
13395 * ID/IDREF registration will be done in xmlValidateElement below
13396 */
13397 ctxt->loadsubset |= XML_SKIP_IDS;
13398 }
13399 ctxt->dictNames = oldctxt->dictNames;
13400 ctxt->attsDefault = oldctxt->attsDefault;
13401 ctxt->attsSpecial = oldctxt->attsSpecial;
13402
13403 xmlParseContent(ctxt);
13404 if ((RAW == '<') && (NXT(1) == '/')) {
13405 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13406 } else if (RAW != 0) {
13407 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13408 }
13409 if (ctxt->node != ctxt->myDoc->children) {
13410 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13411 }
13412
13413 if (!ctxt->wellFormed) {
13414 if (ctxt->errNo == 0)
13415 ret = XML_ERR_INTERNAL_ERROR;
13416 else
13417 ret = (xmlParserErrors)ctxt->errNo;
13418 } else {
13419 ret = XML_ERR_OK;
13420 }
13421
13422 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13423 xmlNodePtr cur;
13424
13425 /*
13426 * Return the newly created nodeset after unlinking it from
13427 * they pseudo parent.
13428 */
13429 cur = ctxt->myDoc->children->children;
13430 *lst = cur;
13431 while (cur != NULL) {
13432 #ifdef LIBXML_VALID_ENABLED
13433 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13434 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13435 (cur->type == XML_ELEMENT_NODE)) {
13436 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13437 oldctxt->myDoc, cur);
13438 }
13439 #endif /* LIBXML_VALID_ENABLED */
13440 cur->parent = NULL;
13441 cur = cur->next;
13442 }
13443 ctxt->myDoc->children->children = NULL;
13444 }
13445 if (ctxt->myDoc != NULL) {
13446 xmlFreeNode(ctxt->myDoc->children);
13447 ctxt->myDoc->children = content;
13448 ctxt->myDoc->last = last;
13449 }
13450
13451 /*
13452 * Record in the parent context the number of entities replacement
13453 * done when parsing that reference.
13454 */
13455 if (oldctxt != NULL)
13456 oldctxt->nbentities += ctxt->nbentities;
13457
13458 /*
13459 * Also record the last error if any
13460 */
13461 if (ctxt->lastError.code != XML_ERR_OK)
13462 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13463
13464 ctxt->sax = oldsax;
13465 ctxt->dict = NULL;
13466 ctxt->attsDefault = NULL;
13467 ctxt->attsSpecial = NULL;
13468 xmlFreeParserCtxt(ctxt);
13469 if (newDoc != NULL) {
13470 xmlFreeDoc(newDoc);
13471 }
13472
13473 return(ret);
13474 }
13475
13476 /**
13477 * xmlParseInNodeContext:
13478 * @node: the context node
13479 * @data: the input string
13480 * @datalen: the input string length in bytes
13481 * @options: a combination of xmlParserOption
13482 * @lst: the return value for the set of parsed nodes
13483 *
13484 * Parse a well-balanced chunk of an XML document
13485 * within the context (DTD, namespaces, etc ...) of the given node.
13486 *
13487 * The allowed sequence for the data is a Well Balanced Chunk defined by
13488 * the content production in the XML grammar:
13489 *
13490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13491 *
13492 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493 * error code otherwise
13494 */
13495 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13496 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13497 int options, xmlNodePtr *lst) {
13498 #ifdef SAX2
13499 xmlParserCtxtPtr ctxt;
13500 xmlDocPtr doc = NULL;
13501 xmlNodePtr fake, cur;
13502 int nsnr = 0;
13503
13504 xmlParserErrors ret = XML_ERR_OK;
13505
13506 /*
13507 * check all input parameters, grab the document
13508 */
13509 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13510 return(XML_ERR_INTERNAL_ERROR);
13511 switch (node->type) {
13512 case XML_ELEMENT_NODE:
13513 case XML_ATTRIBUTE_NODE:
13514 case XML_TEXT_NODE:
13515 case XML_CDATA_SECTION_NODE:
13516 case XML_ENTITY_REF_NODE:
13517 case XML_PI_NODE:
13518 case XML_COMMENT_NODE:
13519 case XML_DOCUMENT_NODE:
13520 case XML_HTML_DOCUMENT_NODE:
13521 break;
13522 default:
13523 return(XML_ERR_INTERNAL_ERROR);
13524
13525 }
13526 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13527 (node->type != XML_DOCUMENT_NODE) &&
13528 (node->type != XML_HTML_DOCUMENT_NODE))
13529 node = node->parent;
13530 if (node == NULL)
13531 return(XML_ERR_INTERNAL_ERROR);
13532 if (node->type == XML_ELEMENT_NODE)
13533 doc = node->doc;
13534 else
13535 doc = (xmlDocPtr) node;
13536 if (doc == NULL)
13537 return(XML_ERR_INTERNAL_ERROR);
13538
13539 /*
13540 * allocate a context and set-up everything not related to the
13541 * node position in the tree
13542 */
13543 if (doc->type == XML_DOCUMENT_NODE)
13544 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13545 #ifdef LIBXML_HTML_ENABLED
13546 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13547 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13548 /*
13549 * When parsing in context, it makes no sense to add implied
13550 * elements like html/body/etc...
13551 */
13552 options |= HTML_PARSE_NOIMPLIED;
13553 }
13554 #endif
13555 else
13556 return(XML_ERR_INTERNAL_ERROR);
13557
13558 if (ctxt == NULL)
13559 return(XML_ERR_NO_MEMORY);
13560
13561 /*
13562 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564 * we must wait until the last moment to free the original one.
13565 */
13566 if (doc->dict != NULL) {
13567 if (ctxt->dict != NULL)
13568 xmlDictFree(ctxt->dict);
13569 ctxt->dict = doc->dict;
13570 } else
13571 options |= XML_PARSE_NODICT;
13572
13573 if (doc->encoding != NULL) {
13574 xmlCharEncodingHandlerPtr hdlr;
13575
13576 if (ctxt->encoding != NULL)
13577 xmlFree((xmlChar *) ctxt->encoding);
13578 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13579
13580 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13581 if (hdlr != NULL) {
13582 xmlSwitchToEncoding(ctxt, hdlr);
13583 } else {
13584 return(XML_ERR_UNSUPPORTED_ENCODING);
13585 }
13586 }
13587
13588 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13589 xmlDetectSAX2(ctxt);
13590 ctxt->myDoc = doc;
13591 /* parsing in context, i.e. as within existing content */
13592 ctxt->input_id = 2;
13593 ctxt->instate = XML_PARSER_CONTENT;
13594
13595 fake = xmlNewDocComment(node->doc, NULL);
13596 if (fake == NULL) {
13597 xmlFreeParserCtxt(ctxt);
13598 return(XML_ERR_NO_MEMORY);
13599 }
13600 xmlAddChild(node, fake);
13601
13602 if (node->type == XML_ELEMENT_NODE) {
13603 nodePush(ctxt, node);
13604 /*
13605 * initialize the SAX2 namespaces stack
13606 */
13607 cur = node;
13608 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13609 xmlNsPtr ns = cur->nsDef;
13610 const xmlChar *iprefix, *ihref;
13611
13612 while (ns != NULL) {
13613 if (ctxt->dict) {
13614 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13615 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13616 } else {
13617 iprefix = ns->prefix;
13618 ihref = ns->href;
13619 }
13620
13621 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13622 nsPush(ctxt, iprefix, ihref);
13623 nsnr++;
13624 }
13625 ns = ns->next;
13626 }
13627 cur = cur->parent;
13628 }
13629 }
13630
13631 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13632 /*
13633 * ID/IDREF registration will be done in xmlValidateElement below
13634 */
13635 ctxt->loadsubset |= XML_SKIP_IDS;
13636 }
13637
13638 #ifdef LIBXML_HTML_ENABLED
13639 if (doc->type == XML_HTML_DOCUMENT_NODE)
13640 __htmlParseContent(ctxt);
13641 else
13642 #endif
13643 xmlParseContent(ctxt);
13644
13645 nsPop(ctxt, nsnr);
13646 if ((RAW == '<') && (NXT(1) == '/')) {
13647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13648 } else if (RAW != 0) {
13649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13650 }
13651 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653 ctxt->wellFormed = 0;
13654 }
13655
13656 if (!ctxt->wellFormed) {
13657 if (ctxt->errNo == 0)
13658 ret = XML_ERR_INTERNAL_ERROR;
13659 else
13660 ret = (xmlParserErrors)ctxt->errNo;
13661 } else {
13662 ret = XML_ERR_OK;
13663 }
13664
13665 /*
13666 * Return the newly created nodeset after unlinking it from
13667 * the pseudo sibling.
13668 */
13669
13670 cur = fake->next;
13671 fake->next = NULL;
13672 node->last = fake;
13673
13674 if (cur != NULL) {
13675 cur->prev = NULL;
13676 }
13677
13678 *lst = cur;
13679
13680 while (cur != NULL) {
13681 cur->parent = NULL;
13682 cur = cur->next;
13683 }
13684
13685 xmlUnlinkNode(fake);
13686 xmlFreeNode(fake);
13687
13688
13689 if (ret != XML_ERR_OK) {
13690 xmlFreeNodeList(*lst);
13691 *lst = NULL;
13692 }
13693
13694 if (doc->dict != NULL)
13695 ctxt->dict = NULL;
13696 xmlFreeParserCtxt(ctxt);
13697
13698 return(ret);
13699 #else /* !SAX2 */
13700 return(XML_ERR_INTERNAL_ERROR);
13701 #endif
13702 }
13703
13704 #ifdef LIBXML_SAX1_ENABLED
13705 /**
13706 * xmlParseBalancedChunkMemoryRecover:
13707 * @doc: the document the chunk pertains to (must not be NULL)
13708 * @sax: the SAX handler block (possibly NULL)
13709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13710 * @depth: Used for loop detection, use 0
13711 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13712 * @lst: the return value for the set of parsed nodes
13713 * @recover: return nodes even if the data is broken (use 0)
13714 *
13715 *
13716 * Parse a well-balanced chunk of an XML document
13717 * called by the parser
13718 * The allowed sequence for the Well Balanced Chunk is the one defined by
13719 * the content production in the XML grammar:
13720 *
13721 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13722 *
13723 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724 * the parser error code otherwise
13725 *
13726 * In case recover is set to 1, the nodelist will not be empty even if
13727 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13728 * some extent.
13729 */
13730 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13731 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13732 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13733 int recover) {
13734 xmlParserCtxtPtr ctxt;
13735 xmlDocPtr newDoc;
13736 xmlSAXHandlerPtr oldsax = NULL;
13737 xmlNodePtr content, newRoot;
13738 int size;
13739 int ret = 0;
13740
13741 if (depth > 40) {
13742 return(XML_ERR_ENTITY_LOOP);
13743 }
13744
13745
13746 if (lst != NULL)
13747 *lst = NULL;
13748 if (string == NULL)
13749 return(-1);
13750
13751 size = xmlStrlen(string);
13752
13753 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13754 if (ctxt == NULL) return(-1);
13755 ctxt->userData = ctxt;
13756 if (sax != NULL) {
13757 oldsax = ctxt->sax;
13758 ctxt->sax = sax;
13759 if (user_data != NULL)
13760 ctxt->userData = user_data;
13761 }
13762 newDoc = xmlNewDoc(BAD_CAST "1.0");
13763 if (newDoc == NULL) {
13764 xmlFreeParserCtxt(ctxt);
13765 return(-1);
13766 }
13767 newDoc->properties = XML_DOC_INTERNAL;
13768 if ((doc != NULL) && (doc->dict != NULL)) {
13769 xmlDictFree(ctxt->dict);
13770 ctxt->dict = doc->dict;
13771 xmlDictReference(ctxt->dict);
13772 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13773 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13774 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13775 ctxt->dictNames = 1;
13776 } else {
13777 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13778 }
13779 /* doc == NULL is only supported for historic reasons */
13780 if (doc != NULL) {
13781 newDoc->intSubset = doc->intSubset;
13782 newDoc->extSubset = doc->extSubset;
13783 }
13784 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13785 if (newRoot == NULL) {
13786 if (sax != NULL)
13787 ctxt->sax = oldsax;
13788 xmlFreeParserCtxt(ctxt);
13789 newDoc->intSubset = NULL;
13790 newDoc->extSubset = NULL;
13791 xmlFreeDoc(newDoc);
13792 return(-1);
13793 }
13794 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13795 nodePush(ctxt, newRoot);
13796 /* doc == NULL is only supported for historic reasons */
13797 if (doc == NULL) {
13798 ctxt->myDoc = newDoc;
13799 } else {
13800 ctxt->myDoc = newDoc;
13801 newDoc->children->doc = doc;
13802 /* Ensure that doc has XML spec namespace */
13803 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13804 newDoc->oldNs = doc->oldNs;
13805 }
13806 ctxt->instate = XML_PARSER_CONTENT;
13807 ctxt->input_id = 2;
13808 ctxt->depth = depth;
13809
13810 /*
13811 * Doing validity checking on chunk doesn't make sense
13812 */
13813 ctxt->validate = 0;
13814 ctxt->loadsubset = 0;
13815 xmlDetectSAX2(ctxt);
13816
13817 if ( doc != NULL ){
13818 content = doc->children;
13819 doc->children = NULL;
13820 xmlParseContent(ctxt);
13821 doc->children = content;
13822 }
13823 else {
13824 xmlParseContent(ctxt);
13825 }
13826 if ((RAW == '<') && (NXT(1) == '/')) {
13827 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13828 } else if (RAW != 0) {
13829 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13830 }
13831 if (ctxt->node != newDoc->children) {
13832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13833 }
13834
13835 if (!ctxt->wellFormed) {
13836 if (ctxt->errNo == 0)
13837 ret = 1;
13838 else
13839 ret = ctxt->errNo;
13840 } else {
13841 ret = 0;
13842 }
13843
13844 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13845 xmlNodePtr cur;
13846
13847 /*
13848 * Return the newly created nodeset after unlinking it from
13849 * they pseudo parent.
13850 */
13851 cur = newDoc->children->children;
13852 *lst = cur;
13853 while (cur != NULL) {
13854 xmlSetTreeDoc(cur, doc);
13855 cur->parent = NULL;
13856 cur = cur->next;
13857 }
13858 newDoc->children->children = NULL;
13859 }
13860
13861 if (sax != NULL)
13862 ctxt->sax = oldsax;
13863 xmlFreeParserCtxt(ctxt);
13864 newDoc->intSubset = NULL;
13865 newDoc->extSubset = NULL;
13866 /* This leaks the namespace list if doc == NULL */
13867 newDoc->oldNs = NULL;
13868 xmlFreeDoc(newDoc);
13869
13870 return(ret);
13871 }
13872
13873 /**
13874 * xmlSAXParseEntity:
13875 * @sax: the SAX handler block
13876 * @filename: the filename
13877 *
13878 * parse an XML external entity out of context and build a tree.
13879 * It use the given SAX function block to handle the parsing callback.
13880 * If sax is NULL, fallback to the default DOM tree building routines.
13881 *
13882 * [78] extParsedEnt ::= TextDecl? content
13883 *
13884 * This correspond to a "Well Balanced" chunk
13885 *
13886 * Returns the resulting document tree
13887 */
13888
13889 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13890 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13891 xmlDocPtr ret;
13892 xmlParserCtxtPtr ctxt;
13893
13894 ctxt = xmlCreateFileParserCtxt(filename);
13895 if (ctxt == NULL) {
13896 return(NULL);
13897 }
13898 if (sax != NULL) {
13899 if (ctxt->sax != NULL)
13900 xmlFree(ctxt->sax);
13901 ctxt->sax = sax;
13902 ctxt->userData = NULL;
13903 }
13904
13905 xmlParseExtParsedEnt(ctxt);
13906
13907 if (ctxt->wellFormed)
13908 ret = ctxt->myDoc;
13909 else {
13910 ret = NULL;
13911 xmlFreeDoc(ctxt->myDoc);
13912 ctxt->myDoc = NULL;
13913 }
13914 if (sax != NULL)
13915 ctxt->sax = NULL;
13916 xmlFreeParserCtxt(ctxt);
13917
13918 return(ret);
13919 }
13920
13921 /**
13922 * xmlParseEntity:
13923 * @filename: the filename
13924 *
13925 * parse an XML external entity out of context and build a tree.
13926 *
13927 * [78] extParsedEnt ::= TextDecl? content
13928 *
13929 * This correspond to a "Well Balanced" chunk
13930 *
13931 * Returns the resulting document tree
13932 */
13933
13934 xmlDocPtr
xmlParseEntity(const char * filename)13935 xmlParseEntity(const char *filename) {
13936 return(xmlSAXParseEntity(NULL, filename));
13937 }
13938 #endif /* LIBXML_SAX1_ENABLED */
13939
13940 /**
13941 * xmlCreateEntityParserCtxtInternal:
13942 * @URL: the entity URL
13943 * @ID: the entity PUBLIC ID
13944 * @base: a possible base for the target URI
13945 * @pctx: parser context used to set options on new context
13946 *
13947 * Create a parser context for an external entity
13948 * Automatic support for ZLIB/Compress compressed document is provided
13949 * by default if found at compile-time.
13950 *
13951 * Returns the new parser context or NULL
13952 */
13953 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13954 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13955 const xmlChar *base, xmlParserCtxtPtr pctx) {
13956 xmlParserCtxtPtr ctxt;
13957 xmlParserInputPtr inputStream;
13958 char *directory = NULL;
13959 xmlChar *uri;
13960
13961 ctxt = xmlNewParserCtxt();
13962 if (ctxt == NULL) {
13963 return(NULL);
13964 }
13965
13966 if (pctx != NULL) {
13967 ctxt->options = pctx->options;
13968 ctxt->_private = pctx->_private;
13969 /*
13970 * this is a subparser of pctx, so the input_id should be
13971 * incremented to distinguish from main entity
13972 */
13973 ctxt->input_id = pctx->input_id + 1;
13974 }
13975
13976 /* Don't read from stdin. */
13977 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13978 URL = BAD_CAST "./-";
13979
13980 uri = xmlBuildURI(URL, base);
13981
13982 if (uri == NULL) {
13983 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13984 if (inputStream == NULL) {
13985 xmlFreeParserCtxt(ctxt);
13986 return(NULL);
13987 }
13988
13989 inputPush(ctxt, inputStream);
13990
13991 if ((ctxt->directory == NULL) && (directory == NULL))
13992 directory = xmlParserGetDirectory((char *)URL);
13993 if ((ctxt->directory == NULL) && (directory != NULL))
13994 ctxt->directory = directory;
13995 } else {
13996 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13997 if (inputStream == NULL) {
13998 xmlFree(uri);
13999 xmlFreeParserCtxt(ctxt);
14000 return(NULL);
14001 }
14002
14003 inputPush(ctxt, inputStream);
14004
14005 if ((ctxt->directory == NULL) && (directory == NULL))
14006 directory = xmlParserGetDirectory((char *)uri);
14007 if ((ctxt->directory == NULL) && (directory != NULL))
14008 ctxt->directory = directory;
14009 xmlFree(uri);
14010 }
14011 return(ctxt);
14012 }
14013
14014 /**
14015 * xmlCreateEntityParserCtxt:
14016 * @URL: the entity URL
14017 * @ID: the entity PUBLIC ID
14018 * @base: a possible base for the target URI
14019 *
14020 * Create a parser context for an external entity
14021 * Automatic support for ZLIB/Compress compressed document is provided
14022 * by default if found at compile-time.
14023 *
14024 * Returns the new parser context or NULL
14025 */
14026 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14027 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14028 const xmlChar *base) {
14029 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14030
14031 }
14032
14033 /************************************************************************
14034 * *
14035 * Front ends when parsing from a file *
14036 * *
14037 ************************************************************************/
14038
14039 /**
14040 * xmlCreateURLParserCtxt:
14041 * @filename: the filename or URL
14042 * @options: a combination of xmlParserOption
14043 *
14044 * Create a parser context for a file or URL content.
14045 * Automatic support for ZLIB/Compress compressed document is provided
14046 * by default if found at compile-time and for file accesses
14047 *
14048 * Returns the new parser context or NULL
14049 */
14050 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14051 xmlCreateURLParserCtxt(const char *filename, int options)
14052 {
14053 xmlParserCtxtPtr ctxt;
14054 xmlParserInputPtr inputStream;
14055 char *directory = NULL;
14056
14057 ctxt = xmlNewParserCtxt();
14058 if (ctxt == NULL) {
14059 xmlErrMemory(NULL, "cannot allocate parser context");
14060 return(NULL);
14061 }
14062
14063 if (options)
14064 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14065 ctxt->linenumbers = 1;
14066
14067 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14068 if (inputStream == NULL) {
14069 xmlFreeParserCtxt(ctxt);
14070 return(NULL);
14071 }
14072
14073 inputPush(ctxt, inputStream);
14074 if ((ctxt->directory == NULL) && (directory == NULL))
14075 directory = xmlParserGetDirectory(filename);
14076 if ((ctxt->directory == NULL) && (directory != NULL))
14077 ctxt->directory = directory;
14078
14079 return(ctxt);
14080 }
14081
14082 /**
14083 * xmlCreateFileParserCtxt:
14084 * @filename: the filename
14085 *
14086 * Create a parser context for a file content.
14087 * Automatic support for ZLIB/Compress compressed document is provided
14088 * by default if found at compile-time.
14089 *
14090 * Returns the new parser context or NULL
14091 */
14092 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14093 xmlCreateFileParserCtxt(const char *filename)
14094 {
14095 return(xmlCreateURLParserCtxt(filename, 0));
14096 }
14097
14098 #ifdef LIBXML_SAX1_ENABLED
14099 /**
14100 * xmlSAXParseFileWithData:
14101 * @sax: the SAX handler block
14102 * @filename: the filename
14103 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14104 * documents
14105 * @data: the userdata
14106 *
14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108 * compressed document is provided by default if found at compile-time.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14111 *
14112 * User data (void *) is stored within the parser context in the
14113 * context's _private member, so it is available nearly everywhere in libxml
14114 *
14115 * Returns the resulting document tree
14116 */
14117
14118 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14119 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14120 int recovery, void *data) {
14121 xmlDocPtr ret;
14122 xmlParserCtxtPtr ctxt;
14123
14124 xmlInitParser();
14125
14126 ctxt = xmlCreateFileParserCtxt(filename);
14127 if (ctxt == NULL) {
14128 return(NULL);
14129 }
14130 if (sax != NULL) {
14131 if (ctxt->sax != NULL)
14132 xmlFree(ctxt->sax);
14133 ctxt->sax = sax;
14134 }
14135 xmlDetectSAX2(ctxt);
14136 if (data!=NULL) {
14137 ctxt->_private = data;
14138 }
14139
14140 if (ctxt->directory == NULL)
14141 ctxt->directory = xmlParserGetDirectory(filename);
14142
14143 ctxt->recovery = recovery;
14144
14145 xmlParseDocument(ctxt);
14146
14147 if ((ctxt->wellFormed) || recovery) {
14148 ret = ctxt->myDoc;
14149 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14150 if (ctxt->input->buf->compressed > 0)
14151 ret->compression = 9;
14152 else
14153 ret->compression = ctxt->input->buf->compressed;
14154 }
14155 }
14156 else {
14157 ret = NULL;
14158 xmlFreeDoc(ctxt->myDoc);
14159 ctxt->myDoc = NULL;
14160 }
14161 if (sax != NULL)
14162 ctxt->sax = NULL;
14163 xmlFreeParserCtxt(ctxt);
14164
14165 return(ret);
14166 }
14167
14168 /**
14169 * xmlSAXParseFile:
14170 * @sax: the SAX handler block
14171 * @filename: the filename
14172 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14173 * documents
14174 *
14175 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176 * compressed document is provided by default if found at compile-time.
14177 * It use the given SAX function block to handle the parsing callback.
14178 * If sax is NULL, fallback to the default DOM tree building routines.
14179 *
14180 * Returns the resulting document tree
14181 */
14182
14183 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14184 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14185 int recovery) {
14186 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14187 }
14188
14189 /**
14190 * xmlRecoverDoc:
14191 * @cur: a pointer to an array of xmlChar
14192 *
14193 * parse an XML in-memory document and build a tree.
14194 * In the case the document is not Well Formed, a attempt to build a
14195 * tree is tried anyway
14196 *
14197 * Returns the resulting document tree or NULL in case of failure
14198 */
14199
14200 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14201 xmlRecoverDoc(const xmlChar *cur) {
14202 return(xmlSAXParseDoc(NULL, cur, 1));
14203 }
14204
14205 /**
14206 * xmlParseFile:
14207 * @filename: the filename
14208 *
14209 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210 * compressed document is provided by default if found at compile-time.
14211 *
14212 * Returns the resulting document tree if the file was wellformed,
14213 * NULL otherwise.
14214 */
14215
14216 xmlDocPtr
xmlParseFile(const char * filename)14217 xmlParseFile(const char *filename) {
14218 return(xmlSAXParseFile(NULL, filename, 0));
14219 }
14220
14221 /**
14222 * xmlRecoverFile:
14223 * @filename: the filename
14224 *
14225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226 * compressed document is provided by default if found at compile-time.
14227 * In the case the document is not Well Formed, it attempts to build
14228 * a tree anyway
14229 *
14230 * Returns the resulting document tree or NULL in case of failure
14231 */
14232
14233 xmlDocPtr
xmlRecoverFile(const char * filename)14234 xmlRecoverFile(const char *filename) {
14235 return(xmlSAXParseFile(NULL, filename, 1));
14236 }
14237
14238
14239 /**
14240 * xmlSetupParserForBuffer:
14241 * @ctxt: an XML parser context
14242 * @buffer: a xmlChar * buffer
14243 * @filename: a file name
14244 *
14245 * Setup the parser context to parse a new buffer; Clears any prior
14246 * contents from the parser context. The buffer parameter must not be
14247 * NULL, but the filename parameter can be
14248 */
14249 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14250 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14251 const char* filename)
14252 {
14253 xmlParserInputPtr input;
14254
14255 if ((ctxt == NULL) || (buffer == NULL))
14256 return;
14257
14258 input = xmlNewInputStream(ctxt);
14259 if (input == NULL) {
14260 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14261 xmlClearParserCtxt(ctxt);
14262 return;
14263 }
14264
14265 xmlClearParserCtxt(ctxt);
14266 if (filename != NULL)
14267 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14268 input->base = buffer;
14269 input->cur = buffer;
14270 input->end = &buffer[xmlStrlen(buffer)];
14271 inputPush(ctxt, input);
14272 }
14273
14274 /**
14275 * xmlSAXUserParseFile:
14276 * @sax: a SAX handler
14277 * @user_data: The user data returned on SAX callbacks
14278 * @filename: a file name
14279 *
14280 * parse an XML file and call the given SAX handler routines.
14281 * Automatic support for ZLIB/Compress compressed document is provided
14282 *
14283 * Returns 0 in case of success or a error number otherwise
14284 */
14285 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14286 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14287 const char *filename) {
14288 int ret = 0;
14289 xmlParserCtxtPtr ctxt;
14290
14291 ctxt = xmlCreateFileParserCtxt(filename);
14292 if (ctxt == NULL) return -1;
14293 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14294 xmlFree(ctxt->sax);
14295 ctxt->sax = sax;
14296 xmlDetectSAX2(ctxt);
14297
14298 if (user_data != NULL)
14299 ctxt->userData = user_data;
14300
14301 xmlParseDocument(ctxt);
14302
14303 if (ctxt->wellFormed)
14304 ret = 0;
14305 else {
14306 if (ctxt->errNo != 0)
14307 ret = ctxt->errNo;
14308 else
14309 ret = -1;
14310 }
14311 if (sax != NULL)
14312 ctxt->sax = NULL;
14313 if (ctxt->myDoc != NULL) {
14314 xmlFreeDoc(ctxt->myDoc);
14315 ctxt->myDoc = NULL;
14316 }
14317 xmlFreeParserCtxt(ctxt);
14318
14319 return ret;
14320 }
14321 #endif /* LIBXML_SAX1_ENABLED */
14322
14323 /************************************************************************
14324 * *
14325 * Front ends when parsing from memory *
14326 * *
14327 ************************************************************************/
14328
14329 /**
14330 * xmlCreateMemoryParserCtxt:
14331 * @buffer: a pointer to a char array
14332 * @size: the size of the array
14333 *
14334 * Create a parser context for an XML in-memory document.
14335 *
14336 * Returns the new parser context or NULL
14337 */
14338 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14339 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14340 xmlParserCtxtPtr ctxt;
14341 xmlParserInputPtr input;
14342 xmlParserInputBufferPtr buf;
14343
14344 if (buffer == NULL)
14345 return(NULL);
14346 if (size <= 0)
14347 return(NULL);
14348
14349 ctxt = xmlNewParserCtxt();
14350 if (ctxt == NULL)
14351 return(NULL);
14352
14353 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14354 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14355 if (buf == NULL) {
14356 xmlFreeParserCtxt(ctxt);
14357 return(NULL);
14358 }
14359
14360 input = xmlNewInputStream(ctxt);
14361 if (input == NULL) {
14362 xmlFreeParserInputBuffer(buf);
14363 xmlFreeParserCtxt(ctxt);
14364 return(NULL);
14365 }
14366
14367 input->filename = NULL;
14368 input->buf = buf;
14369 xmlBufResetInput(input->buf->buffer, input);
14370
14371 inputPush(ctxt, input);
14372 return(ctxt);
14373 }
14374
14375 #ifdef LIBXML_SAX1_ENABLED
14376 /**
14377 * xmlSAXParseMemoryWithData:
14378 * @sax: the SAX handler block
14379 * @buffer: an pointer to a char array
14380 * @size: the size of the array
14381 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14382 * documents
14383 * @data: the userdata
14384 *
14385 * parse an XML in-memory block and use the given SAX function block
14386 * to handle the parsing callback. If sax is NULL, fallback to the default
14387 * DOM tree building routines.
14388 *
14389 * User data (void *) is stored within the parser context in the
14390 * context's _private member, so it is available nearly everywhere in libxml
14391 *
14392 * Returns the resulting document tree
14393 */
14394
14395 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14396 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14397 int size, int recovery, void *data) {
14398 xmlDocPtr ret;
14399 xmlParserCtxtPtr ctxt;
14400
14401 xmlInitParser();
14402
14403 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14404 if (ctxt == NULL) return(NULL);
14405 if (sax != NULL) {
14406 if (ctxt->sax != NULL)
14407 xmlFree(ctxt->sax);
14408 ctxt->sax = sax;
14409 }
14410 xmlDetectSAX2(ctxt);
14411 if (data!=NULL) {
14412 ctxt->_private=data;
14413 }
14414
14415 ctxt->recovery = recovery;
14416
14417 xmlParseDocument(ctxt);
14418
14419 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14420 else {
14421 ret = NULL;
14422 xmlFreeDoc(ctxt->myDoc);
14423 ctxt->myDoc = NULL;
14424 }
14425 if (sax != NULL)
14426 ctxt->sax = NULL;
14427 xmlFreeParserCtxt(ctxt);
14428
14429 return(ret);
14430 }
14431
14432 /**
14433 * xmlSAXParseMemory:
14434 * @sax: the SAX handler block
14435 * @buffer: an pointer to a char array
14436 * @size: the size of the array
14437 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14438 * documents
14439 *
14440 * parse an XML in-memory block and use the given SAX function block
14441 * to handle the parsing callback. If sax is NULL, fallback to the default
14442 * DOM tree building routines.
14443 *
14444 * Returns the resulting document tree
14445 */
14446 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14447 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14448 int size, int recovery) {
14449 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14450 }
14451
14452 /**
14453 * xmlParseMemory:
14454 * @buffer: an pointer to a char array
14455 * @size: the size of the array
14456 *
14457 * parse an XML in-memory block and build a tree.
14458 *
14459 * Returns the resulting document tree
14460 */
14461
xmlParseMemory(const char * buffer,int size)14462 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14463 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14464 }
14465
14466 /**
14467 * xmlRecoverMemory:
14468 * @buffer: an pointer to a char array
14469 * @size: the size of the array
14470 *
14471 * parse an XML in-memory block and build a tree.
14472 * In the case the document is not Well Formed, an attempt to
14473 * build a tree is tried anyway
14474 *
14475 * Returns the resulting document tree or NULL in case of error
14476 */
14477
xmlRecoverMemory(const char * buffer,int size)14478 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14479 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14480 }
14481
14482 /**
14483 * xmlSAXUserParseMemory:
14484 * @sax: a SAX handler
14485 * @user_data: The user data returned on SAX callbacks
14486 * @buffer: an in-memory XML document input
14487 * @size: the length of the XML document in bytes
14488 *
14489 * A better SAX parsing routine.
14490 * parse an XML in-memory buffer and call the given SAX handler routines.
14491 *
14492 * Returns 0 in case of success or a error number otherwise
14493 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14494 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14495 const char *buffer, int size) {
14496 int ret = 0;
14497 xmlParserCtxtPtr ctxt;
14498
14499 xmlInitParser();
14500
14501 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14502 if (ctxt == NULL) return -1;
14503 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14504 xmlFree(ctxt->sax);
14505 ctxt->sax = sax;
14506 xmlDetectSAX2(ctxt);
14507
14508 if (user_data != NULL)
14509 ctxt->userData = user_data;
14510
14511 xmlParseDocument(ctxt);
14512
14513 if (ctxt->wellFormed)
14514 ret = 0;
14515 else {
14516 if (ctxt->errNo != 0)
14517 ret = ctxt->errNo;
14518 else
14519 ret = -1;
14520 }
14521 if (sax != NULL)
14522 ctxt->sax = NULL;
14523 if (ctxt->myDoc != NULL) {
14524 xmlFreeDoc(ctxt->myDoc);
14525 ctxt->myDoc = NULL;
14526 }
14527 xmlFreeParserCtxt(ctxt);
14528
14529 return ret;
14530 }
14531 #endif /* LIBXML_SAX1_ENABLED */
14532
14533 /**
14534 * xmlCreateDocParserCtxt:
14535 * @cur: a pointer to an array of xmlChar
14536 *
14537 * Creates a parser context for an XML in-memory document.
14538 *
14539 * Returns the new parser context or NULL
14540 */
14541 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14542 xmlCreateDocParserCtxt(const xmlChar *cur) {
14543 int len;
14544
14545 if (cur == NULL)
14546 return(NULL);
14547 len = xmlStrlen(cur);
14548 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14549 }
14550
14551 #ifdef LIBXML_SAX1_ENABLED
14552 /**
14553 * xmlSAXParseDoc:
14554 * @sax: the SAX handler block
14555 * @cur: a pointer to an array of xmlChar
14556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14557 * documents
14558 *
14559 * parse an XML in-memory document and build a tree.
14560 * It use the given SAX function block to handle the parsing callback.
14561 * If sax is NULL, fallback to the default DOM tree building routines.
14562 *
14563 * Returns the resulting document tree
14564 */
14565
14566 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14567 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14568 xmlDocPtr ret;
14569 xmlParserCtxtPtr ctxt;
14570 xmlSAXHandlerPtr oldsax = NULL;
14571
14572 if (cur == NULL) return(NULL);
14573
14574
14575 ctxt = xmlCreateDocParserCtxt(cur);
14576 if (ctxt == NULL) return(NULL);
14577 if (sax != NULL) {
14578 oldsax = ctxt->sax;
14579 ctxt->sax = sax;
14580 ctxt->userData = NULL;
14581 }
14582 xmlDetectSAX2(ctxt);
14583
14584 xmlParseDocument(ctxt);
14585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14586 else {
14587 ret = NULL;
14588 xmlFreeDoc(ctxt->myDoc);
14589 ctxt->myDoc = NULL;
14590 }
14591 if (sax != NULL)
14592 ctxt->sax = oldsax;
14593 xmlFreeParserCtxt(ctxt);
14594
14595 return(ret);
14596 }
14597
14598 /**
14599 * xmlParseDoc:
14600 * @cur: a pointer to an array of xmlChar
14601 *
14602 * parse an XML in-memory document and build a tree.
14603 *
14604 * Returns the resulting document tree
14605 */
14606
14607 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14608 xmlParseDoc(const xmlChar *cur) {
14609 return(xmlSAXParseDoc(NULL, cur, 0));
14610 }
14611 #endif /* LIBXML_SAX1_ENABLED */
14612
14613 #ifdef LIBXML_LEGACY_ENABLED
14614 /************************************************************************
14615 * *
14616 * Specific function to keep track of entities references *
14617 * and used by the XSLT debugger *
14618 * *
14619 ************************************************************************/
14620
14621 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14622
14623 /**
14624 * xmlAddEntityReference:
14625 * @ent : A valid entity
14626 * @firstNode : A valid first node for children of entity
14627 * @lastNode : A valid last node of children entity
14628 *
14629 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14630 */
14631 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14632 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14633 xmlNodePtr lastNode)
14634 {
14635 if (xmlEntityRefFunc != NULL) {
14636 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14637 }
14638 }
14639
14640
14641 /**
14642 * xmlSetEntityReferenceFunc:
14643 * @func: A valid function
14644 *
14645 * Set the function to call call back when a xml reference has been made
14646 */
14647 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14648 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14649 {
14650 xmlEntityRefFunc = func;
14651 }
14652 #endif /* LIBXML_LEGACY_ENABLED */
14653
14654 /************************************************************************
14655 * *
14656 * Miscellaneous *
14657 * *
14658 ************************************************************************/
14659
14660 #ifdef LIBXML_XPATH_ENABLED
14661 #include <libxml/xpath.h>
14662 #endif
14663
14664 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14665 static int xmlParserInitialized = 0;
14666
14667 /**
14668 * xmlInitParser:
14669 *
14670 * Initialization function for the XML parser.
14671 * This is not reentrant. Call once before processing in case of
14672 * use in multithreaded programs.
14673 */
14674
14675 void
xmlInitParser(void)14676 xmlInitParser(void) {
14677 if (xmlParserInitialized != 0)
14678 return;
14679
14680 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14681 if (xmlFree == free)
14682 atexit(xmlCleanupParser);
14683 #endif
14684
14685 #ifdef LIBXML_THREAD_ENABLED
14686 __xmlGlobalInitMutexLock();
14687 if (xmlParserInitialized == 0) {
14688 #endif
14689 xmlInitThreads();
14690 xmlInitGlobals();
14691 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14692 (xmlGenericError == NULL))
14693 initGenericErrorDefaultFunc(NULL);
14694 xmlInitMemory();
14695 xmlInitializeDict();
14696 xmlInitCharEncodingHandlers();
14697 xmlDefaultSAXHandlerInit();
14698 xmlRegisterDefaultInputCallbacks();
14699 #ifdef LIBXML_OUTPUT_ENABLED
14700 xmlRegisterDefaultOutputCallbacks();
14701 #endif /* LIBXML_OUTPUT_ENABLED */
14702 #ifdef LIBXML_HTML_ENABLED
14703 htmlInitAutoClose();
14704 htmlDefaultSAXHandlerInit();
14705 #endif
14706 #ifdef LIBXML_XPATH_ENABLED
14707 xmlXPathInit();
14708 #endif
14709 xmlParserInitialized = 1;
14710 #ifdef LIBXML_THREAD_ENABLED
14711 }
14712 __xmlGlobalInitMutexUnlock();
14713 #endif
14714 }
14715
14716 /**
14717 * xmlCleanupParser:
14718 *
14719 * This function name is somewhat misleading. It does not clean up
14720 * parser state, it cleans up memory allocated by the library itself.
14721 * It is a cleanup function for the XML library. It tries to reclaim all
14722 * related global memory allocated for the library processing.
14723 * It doesn't deallocate any document related memory. One should
14724 * call xmlCleanupParser() only when the process has finished using
14725 * the library and all XML/HTML documents built with it.
14726 * See also xmlInitParser() which has the opposite function of preparing
14727 * the library for operations.
14728 *
14729 * WARNING: if your application is multithreaded or has plugin support
14730 * calling this may crash the application if another thread or
14731 * a plugin is still using libxml2. It's sometimes very hard to
14732 * guess if libxml2 is in use in the application, some libraries
14733 * or plugins may use it without notice. In case of doubt abstain
14734 * from calling this function or do it just before calling exit()
14735 * to avoid leak reports from valgrind !
14736 */
14737
14738 void
xmlCleanupParser(void)14739 xmlCleanupParser(void) {
14740 if (!xmlParserInitialized)
14741 return;
14742
14743 xmlCleanupCharEncodingHandlers();
14744 #ifdef LIBXML_CATALOG_ENABLED
14745 xmlCatalogCleanup();
14746 #endif
14747 xmlDictCleanup();
14748 xmlCleanupInputCallbacks();
14749 #ifdef LIBXML_OUTPUT_ENABLED
14750 xmlCleanupOutputCallbacks();
14751 #endif
14752 #ifdef LIBXML_SCHEMAS_ENABLED
14753 xmlSchemaCleanupTypes();
14754 xmlRelaxNGCleanupTypes();
14755 #endif
14756 xmlCleanupGlobals();
14757 xmlCleanupThreads(); /* must be last if called not from the main thread */
14758 xmlCleanupMemory();
14759 xmlParserInitialized = 0;
14760 }
14761
14762 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14763 !defined(_WIN32)
14764 static void
14765 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14766 xmlDestructor(void) {
14767 /*
14768 * Calling custom deallocation functions in a destructor can cause
14769 * problems, for example with Nokogiri.
14770 */
14771 if (xmlFree == free)
14772 xmlCleanupParser();
14773 }
14774 #endif
14775
14776 /************************************************************************
14777 * *
14778 * New set (2.6.0) of simpler and more flexible APIs *
14779 * *
14780 ************************************************************************/
14781
14782 /**
14783 * DICT_FREE:
14784 * @str: a string
14785 *
14786 * Free a string if it is not owned by the "dict" dictionary in the
14787 * current scope
14788 */
14789 #define DICT_FREE(str) \
14790 if ((str) && ((!dict) || \
14791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14792 xmlFree((char *)(str));
14793
14794 /**
14795 * xmlCtxtReset:
14796 * @ctxt: an XML parser context
14797 *
14798 * Reset a parser context
14799 */
14800 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14801 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14802 {
14803 xmlParserInputPtr input;
14804 xmlDictPtr dict;
14805
14806 if (ctxt == NULL)
14807 return;
14808
14809 dict = ctxt->dict;
14810
14811 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812 xmlFreeInputStream(input);
14813 }
14814 ctxt->inputNr = 0;
14815 ctxt->input = NULL;
14816
14817 ctxt->spaceNr = 0;
14818 if (ctxt->spaceTab != NULL) {
14819 ctxt->spaceTab[0] = -1;
14820 ctxt->space = &ctxt->spaceTab[0];
14821 } else {
14822 ctxt->space = NULL;
14823 }
14824
14825
14826 ctxt->nodeNr = 0;
14827 ctxt->node = NULL;
14828
14829 ctxt->nameNr = 0;
14830 ctxt->name = NULL;
14831
14832 ctxt->nsNr = 0;
14833
14834 DICT_FREE(ctxt->version);
14835 ctxt->version = NULL;
14836 DICT_FREE(ctxt->encoding);
14837 ctxt->encoding = NULL;
14838 DICT_FREE(ctxt->directory);
14839 ctxt->directory = NULL;
14840 DICT_FREE(ctxt->extSubURI);
14841 ctxt->extSubURI = NULL;
14842 DICT_FREE(ctxt->extSubSystem);
14843 ctxt->extSubSystem = NULL;
14844 if (ctxt->myDoc != NULL)
14845 xmlFreeDoc(ctxt->myDoc);
14846 ctxt->myDoc = NULL;
14847
14848 ctxt->standalone = -1;
14849 ctxt->hasExternalSubset = 0;
14850 ctxt->hasPErefs = 0;
14851 ctxt->html = 0;
14852 ctxt->external = 0;
14853 ctxt->instate = XML_PARSER_START;
14854 ctxt->token = 0;
14855
14856 ctxt->wellFormed = 1;
14857 ctxt->nsWellFormed = 1;
14858 ctxt->disableSAX = 0;
14859 ctxt->valid = 1;
14860 #if 0
14861 ctxt->vctxt.userData = ctxt;
14862 ctxt->vctxt.error = xmlParserValidityError;
14863 ctxt->vctxt.warning = xmlParserValidityWarning;
14864 #endif
14865 ctxt->record_info = 0;
14866 ctxt->checkIndex = 0;
14867 ctxt->inSubset = 0;
14868 ctxt->errNo = XML_ERR_OK;
14869 ctxt->depth = 0;
14870 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871 ctxt->catalogs = NULL;
14872 ctxt->nbentities = 0;
14873 ctxt->sizeentities = 0;
14874 ctxt->sizeentcopy = 0;
14875 xmlInitNodeInfoSeq(&ctxt->node_seq);
14876
14877 if (ctxt->attsDefault != NULL) {
14878 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14879 ctxt->attsDefault = NULL;
14880 }
14881 if (ctxt->attsSpecial != NULL) {
14882 xmlHashFree(ctxt->attsSpecial, NULL);
14883 ctxt->attsSpecial = NULL;
14884 }
14885
14886 #ifdef LIBXML_CATALOG_ENABLED
14887 if (ctxt->catalogs != NULL)
14888 xmlCatalogFreeLocal(ctxt->catalogs);
14889 #endif
14890 if (ctxt->lastError.code != XML_ERR_OK)
14891 xmlResetError(&ctxt->lastError);
14892 }
14893
14894 /**
14895 * xmlCtxtResetPush:
14896 * @ctxt: an XML parser context
14897 * @chunk: a pointer to an array of chars
14898 * @size: number of chars in the array
14899 * @filename: an optional file name or URI
14900 * @encoding: the document encoding, or NULL
14901 *
14902 * Reset a push parser context
14903 *
14904 * Returns 0 in case of success and 1 in case of error
14905 */
14906 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908 int size, const char *filename, const char *encoding)
14909 {
14910 xmlParserInputPtr inputStream;
14911 xmlParserInputBufferPtr buf;
14912 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913
14914 if (ctxt == NULL)
14915 return(1);
14916
14917 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919
14920 buf = xmlAllocParserInputBuffer(enc);
14921 if (buf == NULL)
14922 return(1);
14923
14924 if (ctxt == NULL) {
14925 xmlFreeParserInputBuffer(buf);
14926 return(1);
14927 }
14928
14929 xmlCtxtReset(ctxt);
14930
14931 if (filename == NULL) {
14932 ctxt->directory = NULL;
14933 } else {
14934 ctxt->directory = xmlParserGetDirectory(filename);
14935 }
14936
14937 inputStream = xmlNewInputStream(ctxt);
14938 if (inputStream == NULL) {
14939 xmlFreeParserInputBuffer(buf);
14940 return(1);
14941 }
14942
14943 if (filename == NULL)
14944 inputStream->filename = NULL;
14945 else
14946 inputStream->filename = (char *)
14947 xmlCanonicPath((const xmlChar *) filename);
14948 inputStream->buf = buf;
14949 xmlBufResetInput(buf->buffer, inputStream);
14950
14951 inputPush(ctxt, inputStream);
14952
14953 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14954 (ctxt->input->buf != NULL)) {
14955 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14956 size_t cur = ctxt->input->cur - ctxt->input->base;
14957
14958 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14959
14960 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14961 #ifdef DEBUG_PUSH
14962 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14963 #endif
14964 }
14965
14966 if (encoding != NULL) {
14967 xmlCharEncodingHandlerPtr hdlr;
14968
14969 if (ctxt->encoding != NULL)
14970 xmlFree((xmlChar *) ctxt->encoding);
14971 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14972
14973 hdlr = xmlFindCharEncodingHandler(encoding);
14974 if (hdlr != NULL) {
14975 xmlSwitchToEncoding(ctxt, hdlr);
14976 } else {
14977 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14978 "Unsupported encoding %s\n", BAD_CAST encoding);
14979 }
14980 } else if (enc != XML_CHAR_ENCODING_NONE) {
14981 xmlSwitchEncoding(ctxt, enc);
14982 }
14983
14984 return(0);
14985 }
14986
14987
14988 /**
14989 * xmlCtxtUseOptionsInternal:
14990 * @ctxt: an XML parser context
14991 * @options: a combination of xmlParserOption
14992 * @encoding: the user provided encoding to use
14993 *
14994 * Applies the options to the parser context
14995 *
14996 * Returns 0 in case of success, the set of unknown or unimplemented options
14997 * in case of error.
14998 */
14999 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15000 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15001 {
15002 if (ctxt == NULL)
15003 return(-1);
15004 if (encoding != NULL) {
15005 if (ctxt->encoding != NULL)
15006 xmlFree((xmlChar *) ctxt->encoding);
15007 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15008 }
15009 if (options & XML_PARSE_RECOVER) {
15010 ctxt->recovery = 1;
15011 options -= XML_PARSE_RECOVER;
15012 ctxt->options |= XML_PARSE_RECOVER;
15013 } else
15014 ctxt->recovery = 0;
15015 if (options & XML_PARSE_DTDLOAD) {
15016 ctxt->loadsubset = XML_DETECT_IDS;
15017 options -= XML_PARSE_DTDLOAD;
15018 ctxt->options |= XML_PARSE_DTDLOAD;
15019 } else
15020 ctxt->loadsubset = 0;
15021 if (options & XML_PARSE_DTDATTR) {
15022 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15023 options -= XML_PARSE_DTDATTR;
15024 ctxt->options |= XML_PARSE_DTDATTR;
15025 }
15026 if (options & XML_PARSE_NOENT) {
15027 ctxt->replaceEntities = 1;
15028 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15029 options -= XML_PARSE_NOENT;
15030 ctxt->options |= XML_PARSE_NOENT;
15031 } else
15032 ctxt->replaceEntities = 0;
15033 if (options & XML_PARSE_PEDANTIC) {
15034 ctxt->pedantic = 1;
15035 options -= XML_PARSE_PEDANTIC;
15036 ctxt->options |= XML_PARSE_PEDANTIC;
15037 } else
15038 ctxt->pedantic = 0;
15039 if (options & XML_PARSE_NOBLANKS) {
15040 ctxt->keepBlanks = 0;
15041 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15042 options -= XML_PARSE_NOBLANKS;
15043 ctxt->options |= XML_PARSE_NOBLANKS;
15044 } else
15045 ctxt->keepBlanks = 1;
15046 if (options & XML_PARSE_DTDVALID) {
15047 ctxt->validate = 1;
15048 if (options & XML_PARSE_NOWARNING)
15049 ctxt->vctxt.warning = NULL;
15050 if (options & XML_PARSE_NOERROR)
15051 ctxt->vctxt.error = NULL;
15052 options -= XML_PARSE_DTDVALID;
15053 ctxt->options |= XML_PARSE_DTDVALID;
15054 } else
15055 ctxt->validate = 0;
15056 if (options & XML_PARSE_NOWARNING) {
15057 ctxt->sax->warning = NULL;
15058 options -= XML_PARSE_NOWARNING;
15059 }
15060 if (options & XML_PARSE_NOERROR) {
15061 ctxt->sax->error = NULL;
15062 ctxt->sax->fatalError = NULL;
15063 options -= XML_PARSE_NOERROR;
15064 }
15065 #ifdef LIBXML_SAX1_ENABLED
15066 if (options & XML_PARSE_SAX1) {
15067 ctxt->sax->startElement = xmlSAX2StartElement;
15068 ctxt->sax->endElement = xmlSAX2EndElement;
15069 ctxt->sax->startElementNs = NULL;
15070 ctxt->sax->endElementNs = NULL;
15071 ctxt->sax->initialized = 1;
15072 options -= XML_PARSE_SAX1;
15073 ctxt->options |= XML_PARSE_SAX1;
15074 }
15075 #endif /* LIBXML_SAX1_ENABLED */
15076 if (options & XML_PARSE_NODICT) {
15077 ctxt->dictNames = 0;
15078 options -= XML_PARSE_NODICT;
15079 ctxt->options |= XML_PARSE_NODICT;
15080 } else {
15081 ctxt->dictNames = 1;
15082 }
15083 if (options & XML_PARSE_NOCDATA) {
15084 ctxt->sax->cdataBlock = NULL;
15085 options -= XML_PARSE_NOCDATA;
15086 ctxt->options |= XML_PARSE_NOCDATA;
15087 }
15088 if (options & XML_PARSE_NSCLEAN) {
15089 ctxt->options |= XML_PARSE_NSCLEAN;
15090 options -= XML_PARSE_NSCLEAN;
15091 }
15092 if (options & XML_PARSE_NONET) {
15093 ctxt->options |= XML_PARSE_NONET;
15094 options -= XML_PARSE_NONET;
15095 }
15096 if (options & XML_PARSE_COMPACT) {
15097 ctxt->options |= XML_PARSE_COMPACT;
15098 options -= XML_PARSE_COMPACT;
15099 }
15100 if (options & XML_PARSE_OLD10) {
15101 ctxt->options |= XML_PARSE_OLD10;
15102 options -= XML_PARSE_OLD10;
15103 }
15104 if (options & XML_PARSE_NOBASEFIX) {
15105 ctxt->options |= XML_PARSE_NOBASEFIX;
15106 options -= XML_PARSE_NOBASEFIX;
15107 }
15108 if (options & XML_PARSE_HUGE) {
15109 ctxt->options |= XML_PARSE_HUGE;
15110 options -= XML_PARSE_HUGE;
15111 if (ctxt->dict != NULL)
15112 xmlDictSetLimit(ctxt->dict, 0);
15113 }
15114 if (options & XML_PARSE_OLDSAX) {
15115 ctxt->options |= XML_PARSE_OLDSAX;
15116 options -= XML_PARSE_OLDSAX;
15117 }
15118 if (options & XML_PARSE_IGNORE_ENC) {
15119 ctxt->options |= XML_PARSE_IGNORE_ENC;
15120 options -= XML_PARSE_IGNORE_ENC;
15121 }
15122 if (options & XML_PARSE_BIG_LINES) {
15123 ctxt->options |= XML_PARSE_BIG_LINES;
15124 options -= XML_PARSE_BIG_LINES;
15125 }
15126 ctxt->linenumbers = 1;
15127 return (options);
15128 }
15129
15130 /**
15131 * xmlCtxtUseOptions:
15132 * @ctxt: an XML parser context
15133 * @options: a combination of xmlParserOption
15134 *
15135 * Applies the options to the parser context
15136 *
15137 * Returns 0 in case of success, the set of unknown or unimplemented options
15138 * in case of error.
15139 */
15140 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15141 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15142 {
15143 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15144 }
15145
15146 /**
15147 * xmlDoRead:
15148 * @ctxt: an XML parser context
15149 * @URL: the base URL to use for the document
15150 * @encoding: the document encoding, or NULL
15151 * @options: a combination of xmlParserOption
15152 * @reuse: keep the context for reuse
15153 *
15154 * Common front-end for the xmlRead functions
15155 *
15156 * Returns the resulting document tree or NULL
15157 */
15158 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15159 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15160 int options, int reuse)
15161 {
15162 xmlDocPtr ret;
15163
15164 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15165 if (encoding != NULL) {
15166 xmlCharEncodingHandlerPtr hdlr;
15167
15168 hdlr = xmlFindCharEncodingHandler(encoding);
15169 if (hdlr != NULL)
15170 xmlSwitchToEncoding(ctxt, hdlr);
15171 }
15172 if ((URL != NULL) && (ctxt->input != NULL) &&
15173 (ctxt->input->filename == NULL))
15174 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15175 xmlParseDocument(ctxt);
15176 if ((ctxt->wellFormed) || ctxt->recovery)
15177 ret = ctxt->myDoc;
15178 else {
15179 ret = NULL;
15180 if (ctxt->myDoc != NULL) {
15181 xmlFreeDoc(ctxt->myDoc);
15182 }
15183 }
15184 ctxt->myDoc = NULL;
15185 if (!reuse) {
15186 xmlFreeParserCtxt(ctxt);
15187 }
15188
15189 return (ret);
15190 }
15191
15192 /**
15193 * xmlReadDoc:
15194 * @cur: a pointer to a zero terminated string
15195 * @URL: the base URL to use for the document
15196 * @encoding: the document encoding, or NULL
15197 * @options: a combination of xmlParserOption
15198 *
15199 * parse an XML in-memory document and build a tree.
15200 *
15201 * Returns the resulting document tree
15202 */
15203 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15204 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15205 {
15206 xmlParserCtxtPtr ctxt;
15207
15208 if (cur == NULL)
15209 return (NULL);
15210 xmlInitParser();
15211
15212 ctxt = xmlCreateDocParserCtxt(cur);
15213 if (ctxt == NULL)
15214 return (NULL);
15215 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15216 }
15217
15218 /**
15219 * xmlReadFile:
15220 * @filename: a file or URL
15221 * @encoding: the document encoding, or NULL
15222 * @options: a combination of xmlParserOption
15223 *
15224 * parse an XML file from the filesystem or the network.
15225 *
15226 * Returns the resulting document tree
15227 */
15228 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15229 xmlReadFile(const char *filename, const char *encoding, int options)
15230 {
15231 xmlParserCtxtPtr ctxt;
15232
15233 xmlInitParser();
15234 ctxt = xmlCreateURLParserCtxt(filename, options);
15235 if (ctxt == NULL)
15236 return (NULL);
15237 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15238 }
15239
15240 /**
15241 * xmlReadMemory:
15242 * @buffer: a pointer to a char array
15243 * @size: the size of the array
15244 * @URL: the base URL to use for the document
15245 * @encoding: the document encoding, or NULL
15246 * @options: a combination of xmlParserOption
15247 *
15248 * parse an XML in-memory document and build a tree.
15249 *
15250 * Returns the resulting document tree
15251 */
15252 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15253 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15254 {
15255 xmlParserCtxtPtr ctxt;
15256
15257 xmlInitParser();
15258 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15259 if (ctxt == NULL)
15260 return (NULL);
15261 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15262 }
15263
15264 /**
15265 * xmlReadFd:
15266 * @fd: an open file descriptor
15267 * @URL: the base URL to use for the document
15268 * @encoding: the document encoding, or NULL
15269 * @options: a combination of xmlParserOption
15270 *
15271 * parse an XML from a file descriptor and build a tree.
15272 * NOTE that the file descriptor will not be closed when the
15273 * reader is closed or reset.
15274 *
15275 * Returns the resulting document tree
15276 */
15277 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15278 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15279 {
15280 xmlParserCtxtPtr ctxt;
15281 xmlParserInputBufferPtr input;
15282 xmlParserInputPtr stream;
15283
15284 if (fd < 0)
15285 return (NULL);
15286 xmlInitParser();
15287
15288 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15289 if (input == NULL)
15290 return (NULL);
15291 input->closecallback = NULL;
15292 ctxt = xmlNewParserCtxt();
15293 if (ctxt == NULL) {
15294 xmlFreeParserInputBuffer(input);
15295 return (NULL);
15296 }
15297 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15298 if (stream == NULL) {
15299 xmlFreeParserInputBuffer(input);
15300 xmlFreeParserCtxt(ctxt);
15301 return (NULL);
15302 }
15303 inputPush(ctxt, stream);
15304 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305 }
15306
15307 /**
15308 * xmlReadIO:
15309 * @ioread: an I/O read function
15310 * @ioclose: an I/O close function
15311 * @ioctx: an I/O handler
15312 * @URL: the base URL to use for the document
15313 * @encoding: the document encoding, or NULL
15314 * @options: a combination of xmlParserOption
15315 *
15316 * parse an XML document from I/O functions and source and build a tree.
15317 *
15318 * Returns the resulting document tree
15319 */
15320 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15321 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15322 void *ioctx, const char *URL, const char *encoding, int options)
15323 {
15324 xmlParserCtxtPtr ctxt;
15325 xmlParserInputBufferPtr input;
15326 xmlParserInputPtr stream;
15327
15328 if (ioread == NULL)
15329 return (NULL);
15330 xmlInitParser();
15331
15332 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15333 XML_CHAR_ENCODING_NONE);
15334 if (input == NULL) {
15335 if (ioclose != NULL)
15336 ioclose(ioctx);
15337 return (NULL);
15338 }
15339 ctxt = xmlNewParserCtxt();
15340 if (ctxt == NULL) {
15341 xmlFreeParserInputBuffer(input);
15342 return (NULL);
15343 }
15344 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15345 if (stream == NULL) {
15346 xmlFreeParserInputBuffer(input);
15347 xmlFreeParserCtxt(ctxt);
15348 return (NULL);
15349 }
15350 inputPush(ctxt, stream);
15351 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15352 }
15353
15354 /**
15355 * xmlCtxtReadDoc:
15356 * @ctxt: an XML parser context
15357 * @cur: a pointer to a zero terminated string
15358 * @URL: the base URL to use for the document
15359 * @encoding: the document encoding, or NULL
15360 * @options: a combination of xmlParserOption
15361 *
15362 * parse an XML in-memory document and build a tree.
15363 * This reuses the existing @ctxt parser context
15364 *
15365 * Returns the resulting document tree
15366 */
15367 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15368 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15369 const char *URL, const char *encoding, int options)
15370 {
15371 if (cur == NULL)
15372 return (NULL);
15373 return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15374 encoding, options));
15375 }
15376
15377 /**
15378 * xmlCtxtReadFile:
15379 * @ctxt: an XML parser context
15380 * @filename: a file or URL
15381 * @encoding: the document encoding, or NULL
15382 * @options: a combination of xmlParserOption
15383 *
15384 * parse an XML file from the filesystem or the network.
15385 * This reuses the existing @ctxt parser context
15386 *
15387 * Returns the resulting document tree
15388 */
15389 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15390 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15391 const char *encoding, int options)
15392 {
15393 xmlParserInputPtr stream;
15394
15395 if (filename == NULL)
15396 return (NULL);
15397 if (ctxt == NULL)
15398 return (NULL);
15399 xmlInitParser();
15400
15401 xmlCtxtReset(ctxt);
15402
15403 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15404 if (stream == NULL) {
15405 return (NULL);
15406 }
15407 inputPush(ctxt, stream);
15408 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15409 }
15410
15411 /**
15412 * xmlCtxtReadMemory:
15413 * @ctxt: an XML parser context
15414 * @buffer: a pointer to a char array
15415 * @size: the size of the array
15416 * @URL: the base URL to use for the document
15417 * @encoding: the document encoding, or NULL
15418 * @options: a combination of xmlParserOption
15419 *
15420 * parse an XML in-memory document and build a tree.
15421 * This reuses the existing @ctxt parser context
15422 *
15423 * Returns the resulting document tree
15424 */
15425 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15426 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15427 const char *URL, const char *encoding, int options)
15428 {
15429 xmlParserInputBufferPtr input;
15430 xmlParserInputPtr stream;
15431
15432 if (ctxt == NULL)
15433 return (NULL);
15434 if (buffer == NULL)
15435 return (NULL);
15436 xmlInitParser();
15437
15438 xmlCtxtReset(ctxt);
15439
15440 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15441 if (input == NULL) {
15442 return(NULL);
15443 }
15444
15445 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15446 if (stream == NULL) {
15447 xmlFreeParserInputBuffer(input);
15448 return(NULL);
15449 }
15450
15451 inputPush(ctxt, stream);
15452 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15453 }
15454
15455 /**
15456 * xmlCtxtReadFd:
15457 * @ctxt: an XML parser context
15458 * @fd: an open file descriptor
15459 * @URL: the base URL to use for the document
15460 * @encoding: the document encoding, or NULL
15461 * @options: a combination of xmlParserOption
15462 *
15463 * parse an XML from a file descriptor and build a tree.
15464 * This reuses the existing @ctxt parser context
15465 * NOTE that the file descriptor will not be closed when the
15466 * reader is closed or reset.
15467 *
15468 * Returns the resulting document tree
15469 */
15470 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15471 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15472 const char *URL, const char *encoding, int options)
15473 {
15474 xmlParserInputBufferPtr input;
15475 xmlParserInputPtr stream;
15476
15477 if (fd < 0)
15478 return (NULL);
15479 if (ctxt == NULL)
15480 return (NULL);
15481 xmlInitParser();
15482
15483 xmlCtxtReset(ctxt);
15484
15485
15486 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15487 if (input == NULL)
15488 return (NULL);
15489 input->closecallback = NULL;
15490 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15491 if (stream == NULL) {
15492 xmlFreeParserInputBuffer(input);
15493 return (NULL);
15494 }
15495 inputPush(ctxt, stream);
15496 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15497 }
15498
15499 /**
15500 * xmlCtxtReadIO:
15501 * @ctxt: an XML parser context
15502 * @ioread: an I/O read function
15503 * @ioclose: an I/O close function
15504 * @ioctx: an I/O handler
15505 * @URL: the base URL to use for the document
15506 * @encoding: the document encoding, or NULL
15507 * @options: a combination of xmlParserOption
15508 *
15509 * parse an XML document from I/O functions and source and build a tree.
15510 * This reuses the existing @ctxt parser context
15511 *
15512 * Returns the resulting document tree
15513 */
15514 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15515 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15516 xmlInputCloseCallback ioclose, void *ioctx,
15517 const char *URL,
15518 const char *encoding, int options)
15519 {
15520 xmlParserInputBufferPtr input;
15521 xmlParserInputPtr stream;
15522
15523 if (ioread == NULL)
15524 return (NULL);
15525 if (ctxt == NULL)
15526 return (NULL);
15527 xmlInitParser();
15528
15529 xmlCtxtReset(ctxt);
15530
15531 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15532 XML_CHAR_ENCODING_NONE);
15533 if (input == NULL) {
15534 if (ioclose != NULL)
15535 ioclose(ioctx);
15536 return (NULL);
15537 }
15538 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15539 if (stream == NULL) {
15540 xmlFreeParserInputBuffer(input);
15541 return (NULL);
15542 }
15543 inputPush(ctxt, stream);
15544 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15545 }
15546
15547