xref: /reactos/sdk/lib/3rdparty/libxml2/parser.c (revision c0027d11)
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/xmlmemory.h>
55 #include <libxml/threads.h>
56 #include <libxml/globals.h>
57 #include <libxml/tree.h>
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <libxml/valid.h>
61 #include <libxml/entities.h>
62 #include <libxml/xmlerror.h>
63 #include <libxml/encoding.h>
64 #include <libxml/xmlIO.h>
65 #include <libxml/uri.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 #ifdef LIBXML_SCHEMAS_ENABLED
70 #include <libxml/xmlschemastypes.h>
71 #include <libxml/relaxng.h>
72 #endif
73 
74 #include "buf.h"
75 #include "enc.h"
76 
77 struct _xmlStartTag {
78     const xmlChar *prefix;
79     const xmlChar *URI;
80     int line;
81     int nsNr;
82 };
83 
84 static void
85 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86 
87 static xmlParserCtxtPtr
88 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
90 
91 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92 
93 static int
94 xmlParseElementStart(xmlParserCtxtPtr ctxt);
95 
96 static void
97 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
98 
99 /************************************************************************
100  *									*
101  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
102  *									*
103  ************************************************************************/
104 
105 #define XML_MAX_HUGE_LENGTH 1000000000
106 
107 #define XML_PARSER_BIG_ENTITY 1000
108 #define XML_PARSER_LOT_ENTITY 5000
109 
110 /*
111  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112  *    replacement over the size in byte of the input indicates that you have
113  *    and exponential behaviour. A value of 10 correspond to at least 3 entity
114  *    replacement per byte of input.
115  */
116 #define XML_PARSER_NON_LINEAR 10
117 
118 /*
119  * xmlParserEntityCheck
120  *
121  * Function to check non-linear entity expansion behaviour
122  * This is here to detect and stop exponential linear entity expansion
123  * This is not a limitation of the parser but a safety
124  * boundary feature. It can be disabled with the XML_PARSE_HUGE
125  * parser option.
126  */
127 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)128 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
129                      xmlEntityPtr ent, size_t replacement)
130 {
131     size_t consumed = 0;
132     int i;
133 
134     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135         return (0);
136     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137         return (1);
138 
139     /*
140      * This may look absurd but is needed to detect
141      * entities problems
142      */
143     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144 	(ent->content != NULL) && (ent->checked == 0) &&
145 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146 	unsigned long oldnbent = ctxt->nbentities, diff;
147 	xmlChar *rep;
148 
149 	ent->checked = 1;
150 
151         ++ctxt->depth;
152 	rep = xmlStringDecodeEntities(ctxt, ent->content,
153 				  XML_SUBSTITUTE_REF, 0, 0, 0);
154         --ctxt->depth;
155 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156 	    ent->content[0] = 0;
157 	}
158 
159         diff = ctxt->nbentities - oldnbent + 1;
160         if (diff > INT_MAX / 2)
161             diff = INT_MAX / 2;
162 	ent->checked = diff * 2;
163 	if (rep != NULL) {
164 	    if (xmlStrchr(rep, '<'))
165 		ent->checked |= 1;
166 	    xmlFree(rep);
167 	    rep = NULL;
168 	}
169     }
170 
171     /*
172      * Prevent entity exponential check, not just replacement while
173      * parsing the DTD
174      * The check is potentially costly so do that only once in a thousand
175      */
176     if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177         (ctxt->nbentities % 1024 == 0)) {
178 	for (i = 0;i < ctxt->inputNr;i++) {
179 	    consumed += ctxt->inputTab[i]->consumed +
180 	               (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
181 	}
182 	if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
183 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
184 	    ctxt->instate = XML_PARSER_EOF;
185 	    return (1);
186 	}
187 	consumed = 0;
188     }
189 
190 
191 
192     if (replacement != 0) {
193 	if (replacement < XML_MAX_TEXT_LENGTH)
194 	    return(0);
195 
196         /*
197 	 * If the volume of entity copy reaches 10 times the
198 	 * amount of parsed data and over the large text threshold
199 	 * then that's very likely to be an abuse.
200 	 */
201         if (ctxt->input != NULL) {
202 	    consumed = ctxt->input->consumed +
203 	               (ctxt->input->cur - ctxt->input->base);
204 	}
205         consumed += ctxt->sizeentities;
206 
207         if (replacement < XML_PARSER_NON_LINEAR * consumed)
208 	    return(0);
209     } else if (size != 0) {
210         /*
211          * Do the check based on the replacement size of the entity
212          */
213         if (size < XML_PARSER_BIG_ENTITY)
214 	    return(0);
215 
216         /*
217          * A limit on the amount of text data reasonably used
218          */
219         if (ctxt->input != NULL) {
220             consumed = ctxt->input->consumed +
221                 (ctxt->input->cur - ctxt->input->base);
222         }
223         consumed += ctxt->sizeentities;
224 
225         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
226 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
227             return (0);
228     } else if (ent != NULL) {
229         /*
230          * use the number of parsed entities in the replacement
231          */
232         size = ent->checked / 2;
233 
234         /*
235          * The amount of data parsed counting entities size only once
236          */
237         if (ctxt->input != NULL) {
238             consumed = ctxt->input->consumed +
239                 (ctxt->input->cur - ctxt->input->base);
240         }
241         consumed += ctxt->sizeentities;
242 
243         /*
244          * Check the density of entities for the amount of data
245 	 * knowing an entity reference will take at least 3 bytes
246          */
247         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
248             return (0);
249     } else {
250         /*
251          * strange we got no data for checking
252          */
253 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
254 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
255 	    (ctxt->nbentities <= 10000))
256 	    return (0);
257     }
258     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
259     return (1);
260 }
261 
262 /**
263  * xmlParserMaxDepth:
264  *
265  * arbitrary depth limit for the XML documents that we allow to
266  * process. This is not a limitation of the parser but a safety
267  * boundary feature. It can be disabled with the XML_PARSE_HUGE
268  * parser option.
269  */
270 unsigned int xmlParserMaxDepth = 256;
271 
272 
273 
274 #define SAX2 1
275 #define XML_PARSER_BIG_BUFFER_SIZE 300
276 #define XML_PARSER_BUFFER_SIZE 100
277 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278 
279 /**
280  * XML_PARSER_CHUNK_SIZE
281  *
282  * When calling GROW that's the minimal amount of data
283  * the parser expected to have received. It is not a hard
284  * limit but an optimization when reading strings like Names
285  * It is not strictly needed as long as inputs available characters
286  * are followed by 0, which should be provided by the I/O level
287  */
288 #define XML_PARSER_CHUNK_SIZE 100
289 
290 /*
291  * List of XML prefixed PI allowed by W3C specs
292  */
293 
294 static const char* const xmlW3CPIs[] = {
295     "xml-stylesheet",
296     "xml-model",
297     NULL
298 };
299 
300 
301 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
302 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
303                                               const xmlChar **str);
304 
305 static xmlParserErrors
306 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
307 	              xmlSAXHandlerPtr sax,
308 		      void *user_data, int depth, const xmlChar *URL,
309 		      const xmlChar *ID, xmlNodePtr *list);
310 
311 static int
312 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
313                           const char *encoding);
314 #ifdef LIBXML_LEGACY_ENABLED
315 static void
316 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317                       xmlNodePtr lastNode);
318 #endif /* LIBXML_LEGACY_ENABLED */
319 
320 static xmlParserErrors
321 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
322 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
323 
324 static int
325 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
326 
327 /************************************************************************
328  *									*
329  *		Some factorized error routines				*
330  *									*
331  ************************************************************************/
332 
333 /**
334  * xmlErrAttributeDup:
335  * @ctxt:  an XML parser context
336  * @prefix:  the attribute prefix
337  * @localname:  the attribute localname
338  *
339  * Handle a redefinition of attribute error
340  */
341 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)342 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
343                    const xmlChar * localname)
344 {
345     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346         (ctxt->instate == XML_PARSER_EOF))
347 	return;
348     if (ctxt != NULL)
349 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
350 
351     if (prefix == NULL)
352         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
353                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
354                         (const char *) localname, NULL, NULL, 0, 0,
355                         "Attribute %s redefined\n", localname);
356     else
357         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
358                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
359                         (const char *) prefix, (const char *) localname,
360                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361                         localname);
362     if (ctxt != NULL) {
363 	ctxt->wellFormed = 0;
364 	if (ctxt->recovery == 0)
365 	    ctxt->disableSAX = 1;
366     }
367 }
368 
369 /**
370  * xmlFatalErr:
371  * @ctxt:  an XML parser context
372  * @error:  the error number
373  * @extra:  extra information string
374  *
375  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
376  */
377 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)378 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
379 {
380     const char *errmsg;
381 
382     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383         (ctxt->instate == XML_PARSER_EOF))
384 	return;
385     switch (error) {
386         case XML_ERR_INVALID_HEX_CHARREF:
387             errmsg = "CharRef: invalid hexadecimal value";
388             break;
389         case XML_ERR_INVALID_DEC_CHARREF:
390             errmsg = "CharRef: invalid decimal value";
391             break;
392         case XML_ERR_INVALID_CHARREF:
393             errmsg = "CharRef: invalid value";
394             break;
395         case XML_ERR_INTERNAL_ERROR:
396             errmsg = "internal error";
397             break;
398         case XML_ERR_PEREF_AT_EOF:
399             errmsg = "PEReference at end of document";
400             break;
401         case XML_ERR_PEREF_IN_PROLOG:
402             errmsg = "PEReference in prolog";
403             break;
404         case XML_ERR_PEREF_IN_EPILOG:
405             errmsg = "PEReference in epilog";
406             break;
407         case XML_ERR_PEREF_NO_NAME:
408             errmsg = "PEReference: no name";
409             break;
410         case XML_ERR_PEREF_SEMICOL_MISSING:
411             errmsg = "PEReference: expecting ';'";
412             break;
413         case XML_ERR_ENTITY_LOOP:
414             errmsg = "Detected an entity reference loop";
415             break;
416         case XML_ERR_ENTITY_NOT_STARTED:
417             errmsg = "EntityValue: \" or ' expected";
418             break;
419         case XML_ERR_ENTITY_PE_INTERNAL:
420             errmsg = "PEReferences forbidden in internal subset";
421             break;
422         case XML_ERR_ENTITY_NOT_FINISHED:
423             errmsg = "EntityValue: \" or ' expected";
424             break;
425         case XML_ERR_ATTRIBUTE_NOT_STARTED:
426             errmsg = "AttValue: \" or ' expected";
427             break;
428         case XML_ERR_LT_IN_ATTRIBUTE:
429             errmsg = "Unescaped '<' not allowed in attributes values";
430             break;
431         case XML_ERR_LITERAL_NOT_STARTED:
432             errmsg = "SystemLiteral \" or ' expected";
433             break;
434         case XML_ERR_LITERAL_NOT_FINISHED:
435             errmsg = "Unfinished System or Public ID \" or ' expected";
436             break;
437         case XML_ERR_MISPLACED_CDATA_END:
438             errmsg = "Sequence ']]>' not allowed in content";
439             break;
440         case XML_ERR_URI_REQUIRED:
441             errmsg = "SYSTEM or PUBLIC, the URI is missing";
442             break;
443         case XML_ERR_PUBID_REQUIRED:
444             errmsg = "PUBLIC, the Public Identifier is missing";
445             break;
446         case XML_ERR_HYPHEN_IN_COMMENT:
447             errmsg = "Comment must not contain '--' (double-hyphen)";
448             break;
449         case XML_ERR_PI_NOT_STARTED:
450             errmsg = "xmlParsePI : no target name";
451             break;
452         case XML_ERR_RESERVED_XML_NAME:
453             errmsg = "Invalid PI name";
454             break;
455         case XML_ERR_NOTATION_NOT_STARTED:
456             errmsg = "NOTATION: Name expected here";
457             break;
458         case XML_ERR_NOTATION_NOT_FINISHED:
459             errmsg = "'>' required to close NOTATION declaration";
460             break;
461         case XML_ERR_VALUE_REQUIRED:
462             errmsg = "Entity value required";
463             break;
464         case XML_ERR_URI_FRAGMENT:
465             errmsg = "Fragment not allowed";
466             break;
467         case XML_ERR_ATTLIST_NOT_STARTED:
468             errmsg = "'(' required to start ATTLIST enumeration";
469             break;
470         case XML_ERR_NMTOKEN_REQUIRED:
471             errmsg = "NmToken expected in ATTLIST enumeration";
472             break;
473         case XML_ERR_ATTLIST_NOT_FINISHED:
474             errmsg = "')' required to finish ATTLIST enumeration";
475             break;
476         case XML_ERR_MIXED_NOT_STARTED:
477             errmsg = "MixedContentDecl : '|' or ')*' expected";
478             break;
479         case XML_ERR_PCDATA_REQUIRED:
480             errmsg = "MixedContentDecl : '#PCDATA' expected";
481             break;
482         case XML_ERR_ELEMCONTENT_NOT_STARTED:
483             errmsg = "ContentDecl : Name or '(' expected";
484             break;
485         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
486             errmsg = "ContentDecl : ',' '|' or ')' expected";
487             break;
488         case XML_ERR_PEREF_IN_INT_SUBSET:
489             errmsg =
490                 "PEReference: forbidden within markup decl in internal subset";
491             break;
492         case XML_ERR_GT_REQUIRED:
493             errmsg = "expected '>'";
494             break;
495         case XML_ERR_CONDSEC_INVALID:
496             errmsg = "XML conditional section '[' expected";
497             break;
498         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
499             errmsg = "Content error in the external subset";
500             break;
501         case XML_ERR_CONDSEC_INVALID_KEYWORD:
502             errmsg =
503                 "conditional section INCLUDE or IGNORE keyword expected";
504             break;
505         case XML_ERR_CONDSEC_NOT_FINISHED:
506             errmsg = "XML conditional section not closed";
507             break;
508         case XML_ERR_XMLDECL_NOT_STARTED:
509             errmsg = "Text declaration '<?xml' required";
510             break;
511         case XML_ERR_XMLDECL_NOT_FINISHED:
512             errmsg = "parsing XML declaration: '?>' expected";
513             break;
514         case XML_ERR_EXT_ENTITY_STANDALONE:
515             errmsg = "external parsed entities cannot be standalone";
516             break;
517         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
518             errmsg = "EntityRef: expecting ';'";
519             break;
520         case XML_ERR_DOCTYPE_NOT_FINISHED:
521             errmsg = "DOCTYPE improperly terminated";
522             break;
523         case XML_ERR_LTSLASH_REQUIRED:
524             errmsg = "EndTag: '</' not found";
525             break;
526         case XML_ERR_EQUAL_REQUIRED:
527             errmsg = "expected '='";
528             break;
529         case XML_ERR_STRING_NOT_CLOSED:
530             errmsg = "String not closed expecting \" or '";
531             break;
532         case XML_ERR_STRING_NOT_STARTED:
533             errmsg = "String not started expecting ' or \"";
534             break;
535         case XML_ERR_ENCODING_NAME:
536             errmsg = "Invalid XML encoding name";
537             break;
538         case XML_ERR_STANDALONE_VALUE:
539             errmsg = "standalone accepts only 'yes' or 'no'";
540             break;
541         case XML_ERR_DOCUMENT_EMPTY:
542             errmsg = "Document is empty";
543             break;
544         case XML_ERR_DOCUMENT_END:
545             errmsg = "Extra content at the end of the document";
546             break;
547         case XML_ERR_NOT_WELL_BALANCED:
548             errmsg = "chunk is not well balanced";
549             break;
550         case XML_ERR_EXTRA_CONTENT:
551             errmsg = "extra content at the end of well balanced chunk";
552             break;
553         case XML_ERR_VERSION_MISSING:
554             errmsg = "Malformed declaration expecting version";
555             break;
556         case XML_ERR_NAME_TOO_LONG:
557             errmsg = "Name too long";
558             break;
559 #if 0
560         case:
561             errmsg = "";
562             break;
563 #endif
564         default:
565             errmsg = "Unregistered error message";
566     }
567     if (ctxt != NULL)
568 	ctxt->errNo = error;
569     if (info == NULL) {
570         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572                         errmsg);
573     } else {
574         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576                         errmsg, info);
577     }
578     if (ctxt != NULL) {
579 	ctxt->wellFormed = 0;
580 	if (ctxt->recovery == 0)
581 	    ctxt->disableSAX = 1;
582     }
583 }
584 
585 /**
586  * xmlFatalErrMsg:
587  * @ctxt:  an XML parser context
588  * @error:  the error number
589  * @msg:  the error message
590  *
591  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592  */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)594 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595                const char *msg)
596 {
597     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598         (ctxt->instate == XML_PARSER_EOF))
599 	return;
600     if (ctxt != NULL)
601 	ctxt->errNo = error;
602     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604     if (ctxt != NULL) {
605 	ctxt->wellFormed = 0;
606 	if (ctxt->recovery == 0)
607 	    ctxt->disableSAX = 1;
608     }
609 }
610 
611 /**
612  * xmlWarningMsg:
613  * @ctxt:  an XML parser context
614  * @error:  the error number
615  * @msg:  the error message
616  * @str1:  extra data
617  * @str2:  extra data
618  *
619  * Handle a warning.
620  */
621 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)622 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623               const char *msg, const xmlChar *str1, const xmlChar *str2)
624 {
625     xmlStructuredErrorFunc schannel = NULL;
626 
627     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628         (ctxt->instate == XML_PARSER_EOF))
629 	return;
630     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631         (ctxt->sax->initialized == XML_SAX2_MAGIC))
632         schannel = ctxt->sax->serror;
633     if (ctxt != NULL) {
634         __xmlRaiseError(schannel,
635                     (ctxt->sax) ? ctxt->sax->warning : NULL,
636                     ctxt->userData,
637                     ctxt, NULL, XML_FROM_PARSER, error,
638                     XML_ERR_WARNING, NULL, 0,
639 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
640 		    msg, (const char *) str1, (const char *) str2);
641     } else {
642         __xmlRaiseError(schannel, NULL, NULL,
643                     ctxt, NULL, XML_FROM_PARSER, error,
644                     XML_ERR_WARNING, NULL, 0,
645 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
646 		    msg, (const char *) str1, (const char *) str2);
647     }
648 }
649 
650 /**
651  * xmlValidityError:
652  * @ctxt:  an XML parser context
653  * @error:  the error number
654  * @msg:  the error message
655  * @str1:  extra data
656  *
657  * Handle a validity error.
658  */
659 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)660 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661               const char *msg, const xmlChar *str1, const xmlChar *str2)
662 {
663     xmlStructuredErrorFunc schannel = NULL;
664 
665     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666         (ctxt->instate == XML_PARSER_EOF))
667 	return;
668     if (ctxt != NULL) {
669 	ctxt->errNo = error;
670 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671 	    schannel = ctxt->sax->serror;
672     }
673     if (ctxt != NULL) {
674         __xmlRaiseError(schannel,
675                     ctxt->vctxt.error, ctxt->vctxt.userData,
676                     ctxt, NULL, XML_FROM_DTD, error,
677                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
678 		    (const char *) str2, NULL, 0, 0,
679 		    msg, (const char *) str1, (const char *) str2);
680 	ctxt->valid = 0;
681     } else {
682         __xmlRaiseError(schannel, NULL, NULL,
683                     ctxt, NULL, XML_FROM_DTD, error,
684                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
685 		    (const char *) str2, NULL, 0, 0,
686 		    msg, (const char *) str1, (const char *) str2);
687     }
688 }
689 
690 /**
691  * xmlFatalErrMsgInt:
692  * @ctxt:  an XML parser context
693  * @error:  the error number
694  * @msg:  the error message
695  * @val:  an integer value
696  *
697  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
698  */
699 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)700 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701                   const char *msg, int val)
702 {
703     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704         (ctxt->instate == XML_PARSER_EOF))
705 	return;
706     if (ctxt != NULL)
707 	ctxt->errNo = error;
708     __xmlRaiseError(NULL, NULL, NULL,
709                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
710                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711     if (ctxt != NULL) {
712 	ctxt->wellFormed = 0;
713 	if (ctxt->recovery == 0)
714 	    ctxt->disableSAX = 1;
715     }
716 }
717 
718 /**
719  * xmlFatalErrMsgStrIntStr:
720  * @ctxt:  an XML parser context
721  * @error:  the error number
722  * @msg:  the error message
723  * @str1:  an string info
724  * @val:  an integer value
725  * @str2:  an string info
726  *
727  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
728  */
729 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)730 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731                   const char *msg, const xmlChar *str1, int val,
732 		  const xmlChar *str2)
733 {
734     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735         (ctxt->instate == XML_PARSER_EOF))
736 	return;
737     if (ctxt != NULL)
738 	ctxt->errNo = error;
739     __xmlRaiseError(NULL, NULL, NULL,
740                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
741                     NULL, 0, (const char *) str1, (const char *) str2,
742 		    NULL, val, 0, msg, str1, val, str2);
743     if (ctxt != NULL) {
744 	ctxt->wellFormed = 0;
745 	if (ctxt->recovery == 0)
746 	    ctxt->disableSAX = 1;
747     }
748 }
749 
750 /**
751  * xmlFatalErrMsgStr:
752  * @ctxt:  an XML parser context
753  * @error:  the error number
754  * @msg:  the error message
755  * @val:  a string value
756  *
757  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
758  */
759 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)760 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761                   const char *msg, const xmlChar * val)
762 {
763     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764         (ctxt->instate == XML_PARSER_EOF))
765 	return;
766     if (ctxt != NULL)
767 	ctxt->errNo = error;
768     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
769                     XML_FROM_PARSER, error, XML_ERR_FATAL,
770                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771                     val);
772     if (ctxt != NULL) {
773 	ctxt->wellFormed = 0;
774 	if (ctxt->recovery == 0)
775 	    ctxt->disableSAX = 1;
776     }
777 }
778 
779 /**
780  * xmlErrMsgStr:
781  * @ctxt:  an XML parser context
782  * @error:  the error number
783  * @msg:  the error message
784  * @val:  a string value
785  *
786  * Handle a non fatal parser error
787  */
788 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)789 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790                   const char *msg, const xmlChar * val)
791 {
792     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793         (ctxt->instate == XML_PARSER_EOF))
794 	return;
795     if (ctxt != NULL)
796 	ctxt->errNo = error;
797     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
798                     XML_FROM_PARSER, error, XML_ERR_ERROR,
799                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800                     val);
801 }
802 
803 /**
804  * xmlNsErr:
805  * @ctxt:  an XML parser context
806  * @error:  the error number
807  * @msg:  the message
808  * @info1:  extra information string
809  * @info2:  extra information string
810  *
811  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
812  */
813 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)814 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
815          const char *msg,
816          const xmlChar * info1, const xmlChar * info2,
817          const xmlChar * info3)
818 {
819     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820         (ctxt->instate == XML_PARSER_EOF))
821 	return;
822     if (ctxt != NULL)
823 	ctxt->errNo = error;
824     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
826                     (const char *) info2, (const char *) info3, 0, 0, msg,
827                     info1, info2, info3);
828     if (ctxt != NULL)
829 	ctxt->nsWellFormed = 0;
830 }
831 
832 /**
833  * xmlNsWarn
834  * @ctxt:  an XML parser context
835  * @error:  the error number
836  * @msg:  the message
837  * @info1:  extra information string
838  * @info2:  extra information string
839  *
840  * Handle a namespace warning error
841  */
842 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)843 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
844          const char *msg,
845          const xmlChar * info1, const xmlChar * info2,
846          const xmlChar * info3)
847 {
848     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849         (ctxt->instate == XML_PARSER_EOF))
850 	return;
851     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
853                     (const char *) info2, (const char *) info3, 0, 0, msg,
854                     info1, info2, info3);
855 }
856 
857 /************************************************************************
858  *									*
859  *		Library wide options					*
860  *									*
861  ************************************************************************/
862 
863 /**
864   * xmlHasFeature:
865   * @feature: the feature to be examined
866   *
867   * Examines if the library has been compiled with a given feature.
868   *
869   * Returns a non-zero value if the feature exist, otherwise zero.
870   * Returns zero (0) if the feature does not exist or an unknown
871   * unknown feature is requested, non-zero otherwise.
872   */
873 int
xmlHasFeature(xmlFeature feature)874 xmlHasFeature(xmlFeature feature)
875 {
876     switch (feature) {
877 	case XML_WITH_THREAD:
878 #ifdef LIBXML_THREAD_ENABLED
879 	    return(1);
880 #else
881 	    return(0);
882 #endif
883         case XML_WITH_TREE:
884 #ifdef LIBXML_TREE_ENABLED
885             return(1);
886 #else
887             return(0);
888 #endif
889         case XML_WITH_OUTPUT:
890 #ifdef LIBXML_OUTPUT_ENABLED
891             return(1);
892 #else
893             return(0);
894 #endif
895         case XML_WITH_PUSH:
896 #ifdef LIBXML_PUSH_ENABLED
897             return(1);
898 #else
899             return(0);
900 #endif
901         case XML_WITH_READER:
902 #ifdef LIBXML_READER_ENABLED
903             return(1);
904 #else
905             return(0);
906 #endif
907         case XML_WITH_PATTERN:
908 #ifdef LIBXML_PATTERN_ENABLED
909             return(1);
910 #else
911             return(0);
912 #endif
913         case XML_WITH_WRITER:
914 #ifdef LIBXML_WRITER_ENABLED
915             return(1);
916 #else
917             return(0);
918 #endif
919         case XML_WITH_SAX1:
920 #ifdef LIBXML_SAX1_ENABLED
921             return(1);
922 #else
923             return(0);
924 #endif
925         case XML_WITH_FTP:
926 #ifdef LIBXML_FTP_ENABLED
927             return(1);
928 #else
929             return(0);
930 #endif
931         case XML_WITH_HTTP:
932 #ifdef LIBXML_HTTP_ENABLED
933             return(1);
934 #else
935             return(0);
936 #endif
937         case XML_WITH_VALID:
938 #ifdef LIBXML_VALID_ENABLED
939             return(1);
940 #else
941             return(0);
942 #endif
943         case XML_WITH_HTML:
944 #ifdef LIBXML_HTML_ENABLED
945             return(1);
946 #else
947             return(0);
948 #endif
949         case XML_WITH_LEGACY:
950 #ifdef LIBXML_LEGACY_ENABLED
951             return(1);
952 #else
953             return(0);
954 #endif
955         case XML_WITH_C14N:
956 #ifdef LIBXML_C14N_ENABLED
957             return(1);
958 #else
959             return(0);
960 #endif
961         case XML_WITH_CATALOG:
962 #ifdef LIBXML_CATALOG_ENABLED
963             return(1);
964 #else
965             return(0);
966 #endif
967         case XML_WITH_XPATH:
968 #ifdef LIBXML_XPATH_ENABLED
969             return(1);
970 #else
971             return(0);
972 #endif
973         case XML_WITH_XPTR:
974 #ifdef LIBXML_XPTR_ENABLED
975             return(1);
976 #else
977             return(0);
978 #endif
979         case XML_WITH_XINCLUDE:
980 #ifdef LIBXML_XINCLUDE_ENABLED
981             return(1);
982 #else
983             return(0);
984 #endif
985         case XML_WITH_ICONV:
986 #ifdef LIBXML_ICONV_ENABLED
987             return(1);
988 #else
989             return(0);
990 #endif
991         case XML_WITH_ISO8859X:
992 #ifdef LIBXML_ISO8859X_ENABLED
993             return(1);
994 #else
995             return(0);
996 #endif
997         case XML_WITH_UNICODE:
998 #ifdef LIBXML_UNICODE_ENABLED
999             return(1);
1000 #else
1001             return(0);
1002 #endif
1003         case XML_WITH_REGEXP:
1004 #ifdef LIBXML_REGEXP_ENABLED
1005             return(1);
1006 #else
1007             return(0);
1008 #endif
1009         case XML_WITH_AUTOMATA:
1010 #ifdef LIBXML_AUTOMATA_ENABLED
1011             return(1);
1012 #else
1013             return(0);
1014 #endif
1015         case XML_WITH_EXPR:
1016 #ifdef LIBXML_EXPR_ENABLED
1017             return(1);
1018 #else
1019             return(0);
1020 #endif
1021         case XML_WITH_SCHEMAS:
1022 #ifdef LIBXML_SCHEMAS_ENABLED
1023             return(1);
1024 #else
1025             return(0);
1026 #endif
1027         case XML_WITH_SCHEMATRON:
1028 #ifdef LIBXML_SCHEMATRON_ENABLED
1029             return(1);
1030 #else
1031             return(0);
1032 #endif
1033         case XML_WITH_MODULES:
1034 #ifdef LIBXML_MODULES_ENABLED
1035             return(1);
1036 #else
1037             return(0);
1038 #endif
1039         case XML_WITH_DEBUG:
1040 #ifdef LIBXML_DEBUG_ENABLED
1041             return(1);
1042 #else
1043             return(0);
1044 #endif
1045         case XML_WITH_DEBUG_MEM:
1046 #ifdef DEBUG_MEMORY_LOCATION
1047             return(1);
1048 #else
1049             return(0);
1050 #endif
1051         case XML_WITH_DEBUG_RUN:
1052 #ifdef LIBXML_DEBUG_RUNTIME
1053             return(1);
1054 #else
1055             return(0);
1056 #endif
1057         case XML_WITH_ZLIB:
1058 #ifdef LIBXML_ZLIB_ENABLED
1059             return(1);
1060 #else
1061             return(0);
1062 #endif
1063         case XML_WITH_LZMA:
1064 #ifdef LIBXML_LZMA_ENABLED
1065             return(1);
1066 #else
1067             return(0);
1068 #endif
1069         case XML_WITH_ICU:
1070 #ifdef LIBXML_ICU_ENABLED
1071             return(1);
1072 #else
1073             return(0);
1074 #endif
1075         default:
1076 	    break;
1077      }
1078      return(0);
1079 }
1080 
1081 /************************************************************************
1082  *									*
1083  *		SAX2 defaulted attributes handling			*
1084  *									*
1085  ************************************************************************/
1086 
1087 /**
1088  * xmlDetectSAX2:
1089  * @ctxt:  an XML parser context
1090  *
1091  * Do the SAX2 detection and specific initialization
1092  */
1093 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1094 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1095     xmlSAXHandlerPtr sax;
1096 
1097     /* Avoid unused variable warning if features are disabled. */
1098     (void) sax;
1099 
1100     if (ctxt == NULL) return;
1101     sax = ctxt->sax;
1102 #ifdef LIBXML_SAX1_ENABLED
1103     if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1104         ((sax->startElementNs != NULL) ||
1105          (sax->endElementNs != NULL) ||
1106          ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107         ctxt->sax2 = 1;
1108 #else
1109     ctxt->sax2 = 1;
1110 #endif /* LIBXML_SAX1_ENABLED */
1111 
1112     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116 		(ctxt->str_xml_ns == NULL)) {
1117         xmlErrMemory(ctxt, NULL);
1118     }
1119 }
1120 
1121 typedef struct _xmlDefAttrs xmlDefAttrs;
1122 typedef xmlDefAttrs *xmlDefAttrsPtr;
1123 struct _xmlDefAttrs {
1124     int nbAttrs;	/* number of defaulted attributes on that element */
1125     int maxAttrs;       /* the size of the array */
1126 #if __STDC_VERSION__ >= 199901L
1127     /* Using a C99 flexible array member avoids UBSan errors. */
1128     const xmlChar *values[]; /* array of localname/prefix/values/external */
1129 #else
1130     const xmlChar *values[5];
1131 #endif
1132 };
1133 
1134 /**
1135  * xmlAttrNormalizeSpace:
1136  * @src: the source string
1137  * @dst: the target string
1138  *
1139  * Normalize the space in non CDATA attribute values:
1140  * If the attribute type is not CDATA, then the XML processor MUST further
1141  * process the normalized attribute value by discarding any leading and
1142  * trailing space (#x20) characters, and by replacing sequences of space
1143  * (#x20) characters by a single space (#x20) character.
1144  * Note that the size of dst need to be at least src, and if one doesn't need
1145  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1146  * passing src as dst is just fine.
1147  *
1148  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1149  *         is needed.
1150  */
1151 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1152 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1153 {
1154     if ((src == NULL) || (dst == NULL))
1155         return(NULL);
1156 
1157     while (*src == 0x20) src++;
1158     while (*src != 0) {
1159 	if (*src == 0x20) {
1160 	    while (*src == 0x20) src++;
1161 	    if (*src != 0)
1162 		*dst++ = 0x20;
1163 	} else {
1164 	    *dst++ = *src++;
1165 	}
1166     }
1167     *dst = 0;
1168     if (dst == src)
1169        return(NULL);
1170     return(dst);
1171 }
1172 
1173 /**
1174  * xmlAttrNormalizeSpace2:
1175  * @src: the source string
1176  *
1177  * Normalize the space in non CDATA attribute values, a slightly more complex
1178  * front end to avoid allocation problems when running on attribute values
1179  * coming from the input.
1180  *
1181  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1182  *         is needed.
1183  */
1184 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1185 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1186 {
1187     int i;
1188     int remove_head = 0;
1189     int need_realloc = 0;
1190     const xmlChar *cur;
1191 
1192     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193         return(NULL);
1194     i = *len;
1195     if (i <= 0)
1196         return(NULL);
1197 
1198     cur = src;
1199     while (*cur == 0x20) {
1200         cur++;
1201 	remove_head++;
1202     }
1203     while (*cur != 0) {
1204 	if (*cur == 0x20) {
1205 	    cur++;
1206 	    if ((*cur == 0x20) || (*cur == 0)) {
1207 	        need_realloc = 1;
1208 		break;
1209 	    }
1210 	} else
1211 	    cur++;
1212     }
1213     if (need_realloc) {
1214         xmlChar *ret;
1215 
1216 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217 	if (ret == NULL) {
1218 	    xmlErrMemory(ctxt, NULL);
1219 	    return(NULL);
1220 	}
1221 	xmlAttrNormalizeSpace(ret, ret);
1222 	*len = (int) strlen((const char *)ret);
1223         return(ret);
1224     } else if (remove_head) {
1225         *len -= remove_head;
1226         memmove(src, src + remove_head, 1 + *len);
1227 	return(src);
1228     }
1229     return(NULL);
1230 }
1231 
1232 /**
1233  * xmlAddDefAttrs:
1234  * @ctxt:  an XML parser context
1235  * @fullname:  the element fullname
1236  * @fullattr:  the attribute fullname
1237  * @value:  the attribute value
1238  *
1239  * Add a defaulted attribute for an element
1240  */
1241 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1242 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1243                const xmlChar *fullname,
1244                const xmlChar *fullattr,
1245                const xmlChar *value) {
1246     xmlDefAttrsPtr defaults;
1247     int len;
1248     const xmlChar *name;
1249     const xmlChar *prefix;
1250 
1251     /*
1252      * Allows to detect attribute redefinitions
1253      */
1254     if (ctxt->attsSpecial != NULL) {
1255         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256 	    return;
1257     }
1258 
1259     if (ctxt->attsDefault == NULL) {
1260         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261 	if (ctxt->attsDefault == NULL)
1262 	    goto mem_error;
1263     }
1264 
1265     /*
1266      * split the element name into prefix:localname , the string found
1267      * are within the DTD and then not associated to namespace names.
1268      */
1269     name = xmlSplitQName3(fullname, &len);
1270     if (name == NULL) {
1271         name = xmlDictLookup(ctxt->dict, fullname, -1);
1272 	prefix = NULL;
1273     } else {
1274         name = xmlDictLookup(ctxt->dict, name, -1);
1275 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1276     }
1277 
1278     /*
1279      * make sure there is some storage
1280      */
1281     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282     if (defaults == NULL) {
1283         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1284 	                   (4 * 5) * sizeof(const xmlChar *));
1285 	if (defaults == NULL)
1286 	    goto mem_error;
1287 	defaults->nbAttrs = 0;
1288 	defaults->maxAttrs = 4;
1289 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290 	                        defaults, NULL) < 0) {
1291 	    xmlFree(defaults);
1292 	    goto mem_error;
1293 	}
1294     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1295         xmlDefAttrsPtr temp;
1296 
1297         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1298 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299 	if (temp == NULL)
1300 	    goto mem_error;
1301 	defaults = temp;
1302 	defaults->maxAttrs *= 2;
1303 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304 	                        defaults, NULL) < 0) {
1305 	    xmlFree(defaults);
1306 	    goto mem_error;
1307 	}
1308     }
1309 
1310     /*
1311      * Split the element name into prefix:localname , the string found
1312      * are within the DTD and hen not associated to namespace names.
1313      */
1314     name = xmlSplitQName3(fullattr, &len);
1315     if (name == NULL) {
1316         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317 	prefix = NULL;
1318     } else {
1319         name = xmlDictLookup(ctxt->dict, name, -1);
1320 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321     }
1322 
1323     defaults->values[5 * defaults->nbAttrs] = name;
1324     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325     /* intern the string and precompute the end */
1326     len = xmlStrlen(value);
1327     value = xmlDictLookup(ctxt->dict, value, len);
1328     defaults->values[5 * defaults->nbAttrs + 2] = value;
1329     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330     if (ctxt->external)
1331         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332     else
1333         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334     defaults->nbAttrs++;
1335 
1336     return;
1337 
1338 mem_error:
1339     xmlErrMemory(ctxt, NULL);
1340     return;
1341 }
1342 
1343 /**
1344  * xmlAddSpecialAttr:
1345  * @ctxt:  an XML parser context
1346  * @fullname:  the element fullname
1347  * @fullattr:  the attribute fullname
1348  * @type:  the attribute type
1349  *
1350  * Register this attribute type
1351  */
1352 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1353 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1354 		  const xmlChar *fullname,
1355 		  const xmlChar *fullattr,
1356 		  int type)
1357 {
1358     if (ctxt->attsSpecial == NULL) {
1359         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360 	if (ctxt->attsSpecial == NULL)
1361 	    goto mem_error;
1362     }
1363 
1364     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365         return;
1366 
1367     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368                      (void *) (ptrdiff_t) type);
1369     return;
1370 
1371 mem_error:
1372     xmlErrMemory(ctxt, NULL);
1373     return;
1374 }
1375 
1376 /**
1377  * xmlCleanSpecialAttrCallback:
1378  *
1379  * Removes CDATA attributes from the special attribute table
1380  */
1381 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1382 xmlCleanSpecialAttrCallback(void *payload, void *data,
1383                             const xmlChar *fullname, const xmlChar *fullattr,
1384                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1385     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1386 
1387     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1389     }
1390 }
1391 
1392 /**
1393  * xmlCleanSpecialAttr:
1394  * @ctxt:  an XML parser context
1395  *
1396  * Trim the list of attributes defined to remove all those of type
1397  * CDATA as they are not special. This call should be done when finishing
1398  * to parse the DTD and before starting to parse the document root.
1399  */
1400 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1401 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1402 {
1403     if (ctxt->attsSpecial == NULL)
1404         return;
1405 
1406     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1407 
1408     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1409         xmlHashFree(ctxt->attsSpecial, NULL);
1410         ctxt->attsSpecial = NULL;
1411     }
1412     return;
1413 }
1414 
1415 /**
1416  * xmlCheckLanguageID:
1417  * @lang:  pointer to the string value
1418  *
1419  * Checks that the value conforms to the LanguageID production:
1420  *
1421  * NOTE: this is somewhat deprecated, those productions were removed from
1422  *       the XML Second edition.
1423  *
1424  * [33] LanguageID ::= Langcode ('-' Subcode)*
1425  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1426  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1427  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1428  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1429  * [38] Subcode ::= ([a-z] | [A-Z])+
1430  *
1431  * The current REC reference the successors of RFC 1766, currently 5646
1432  *
1433  * http://www.rfc-editor.org/rfc/rfc5646.txt
1434  * langtag       = language
1435  *                 ["-" script]
1436  *                 ["-" region]
1437  *                 *("-" variant)
1438  *                 *("-" extension)
1439  *                 ["-" privateuse]
1440  * language      = 2*3ALPHA            ; shortest ISO 639 code
1441  *                 ["-" extlang]       ; sometimes followed by
1442  *                                     ; extended language subtags
1443  *               / 4ALPHA              ; or reserved for future use
1444  *               / 5*8ALPHA            ; or registered language subtag
1445  *
1446  * extlang       = 3ALPHA              ; selected ISO 639 codes
1447  *                 *2("-" 3ALPHA)      ; permanently reserved
1448  *
1449  * script        = 4ALPHA              ; ISO 15924 code
1450  *
1451  * region        = 2ALPHA              ; ISO 3166-1 code
1452  *               / 3DIGIT              ; UN M.49 code
1453  *
1454  * variant       = 5*8alphanum         ; registered variants
1455  *               / (DIGIT 3alphanum)
1456  *
1457  * extension     = singleton 1*("-" (2*8alphanum))
1458  *
1459  *                                     ; Single alphanumerics
1460  *                                     ; "x" reserved for private use
1461  * singleton     = DIGIT               ; 0 - 9
1462  *               / %x41-57             ; A - W
1463  *               / %x59-5A             ; Y - Z
1464  *               / %x61-77             ; a - w
1465  *               / %x79-7A             ; y - z
1466  *
1467  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1468  * The parser below doesn't try to cope with extension or privateuse
1469  * that could be added but that's not interoperable anyway
1470  *
1471  * Returns 1 if correct 0 otherwise
1472  **/
1473 int
xmlCheckLanguageID(const xmlChar * lang)1474 xmlCheckLanguageID(const xmlChar * lang)
1475 {
1476     const xmlChar *cur = lang, *nxt;
1477 
1478     if (cur == NULL)
1479         return (0);
1480     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481         ((cur[0] == 'I') && (cur[1] == '-')) ||
1482         ((cur[0] == 'x') && (cur[1] == '-')) ||
1483         ((cur[0] == 'X') && (cur[1] == '-'))) {
1484         /*
1485          * Still allow IANA code and user code which were coming
1486          * from the previous version of the XML-1.0 specification
1487          * it's deprecated but we should not fail
1488          */
1489         cur += 2;
1490         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492             cur++;
1493         return(cur[0] == 0);
1494     }
1495     nxt = cur;
1496     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498            nxt++;
1499     if (nxt - cur >= 4) {
1500         /*
1501          * Reserved
1502          */
1503         if ((nxt - cur > 8) || (nxt[0] != 0))
1504             return(0);
1505         return(1);
1506     }
1507     if (nxt - cur < 2)
1508         return(0);
1509     /* we got an ISO 639 code */
1510     if (nxt[0] == 0)
1511         return(1);
1512     if (nxt[0] != '-')
1513         return(0);
1514 
1515     nxt++;
1516     cur = nxt;
1517     /* now we can have extlang or script or region or variant */
1518     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519         goto region_m49;
1520 
1521     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523            nxt++;
1524     if (nxt - cur == 4)
1525         goto script;
1526     if (nxt - cur == 2)
1527         goto region;
1528     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529         goto variant;
1530     if (nxt - cur != 3)
1531         return(0);
1532     /* we parsed an extlang */
1533     if (nxt[0] == 0)
1534         return(1);
1535     if (nxt[0] != '-')
1536         return(0);
1537 
1538     nxt++;
1539     cur = nxt;
1540     /* now we can have script or region or variant */
1541     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542         goto region_m49;
1543 
1544     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546            nxt++;
1547     if (nxt - cur == 2)
1548         goto region;
1549     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550         goto variant;
1551     if (nxt - cur != 4)
1552         return(0);
1553     /* we parsed a script */
1554 script:
1555     if (nxt[0] == 0)
1556         return(1);
1557     if (nxt[0] != '-')
1558         return(0);
1559 
1560     nxt++;
1561     cur = nxt;
1562     /* now we can have region or variant */
1563     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564         goto region_m49;
1565 
1566     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568            nxt++;
1569 
1570     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571         goto variant;
1572     if (nxt - cur != 2)
1573         return(0);
1574     /* we parsed a region */
1575 region:
1576     if (nxt[0] == 0)
1577         return(1);
1578     if (nxt[0] != '-')
1579         return(0);
1580 
1581     nxt++;
1582     cur = nxt;
1583     /* now we can just have a variant */
1584     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586            nxt++;
1587 
1588     if ((nxt - cur < 5) || (nxt - cur > 8))
1589         return(0);
1590 
1591     /* we parsed a variant */
1592 variant:
1593     if (nxt[0] == 0)
1594         return(1);
1595     if (nxt[0] != '-')
1596         return(0);
1597     /* extensions and private use subtags not checked */
1598     return (1);
1599 
1600 region_m49:
1601     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603         nxt += 3;
1604         goto region;
1605     }
1606     return(0);
1607 }
1608 
1609 /************************************************************************
1610  *									*
1611  *		Parser stacks related functions and macros		*
1612  *									*
1613  ************************************************************************/
1614 
1615 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1616                                             const xmlChar ** str);
1617 
1618 #ifdef SAX2
1619 /**
1620  * nsPush:
1621  * @ctxt:  an XML parser context
1622  * @prefix:  the namespace prefix or NULL
1623  * @URL:  the namespace name
1624  *
1625  * Pushes a new parser namespace on top of the ns stack
1626  *
1627  * Returns -1 in case of error, -2 if the namespace should be discarded
1628  *	   and the index in the stack otherwise.
1629  */
1630 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1631 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1632 {
1633     if (ctxt->options & XML_PARSE_NSCLEAN) {
1634         int i;
1635 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636 	    if (ctxt->nsTab[i] == prefix) {
1637 		/* in scope */
1638 	        if (ctxt->nsTab[i + 1] == URL)
1639 		    return(-2);
1640 		/* out of scope keep it */
1641 		break;
1642 	    }
1643 	}
1644     }
1645     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646 	ctxt->nsMax = 10;
1647 	ctxt->nsNr = 0;
1648 	ctxt->nsTab = (const xmlChar **)
1649 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650 	if (ctxt->nsTab == NULL) {
1651 	    xmlErrMemory(ctxt, NULL);
1652 	    ctxt->nsMax = 0;
1653             return (-1);
1654 	}
1655     } else if (ctxt->nsNr >= ctxt->nsMax) {
1656         const xmlChar ** tmp;
1657         ctxt->nsMax *= 2;
1658         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660         if (tmp == NULL) {
1661             xmlErrMemory(ctxt, NULL);
1662 	    ctxt->nsMax /= 2;
1663             return (-1);
1664         }
1665 	ctxt->nsTab = tmp;
1666     }
1667     ctxt->nsTab[ctxt->nsNr++] = prefix;
1668     ctxt->nsTab[ctxt->nsNr++] = URL;
1669     return (ctxt->nsNr);
1670 }
1671 /**
1672  * nsPop:
1673  * @ctxt: an XML parser context
1674  * @nr:  the number to pop
1675  *
1676  * Pops the top @nr parser prefix/namespace from the ns stack
1677  *
1678  * Returns the number of namespaces removed
1679  */
1680 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1681 nsPop(xmlParserCtxtPtr ctxt, int nr)
1682 {
1683     int i;
1684 
1685     if (ctxt->nsTab == NULL) return(0);
1686     if (ctxt->nsNr < nr) {
1687         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688         nr = ctxt->nsNr;
1689     }
1690     if (ctxt->nsNr <= 0)
1691         return (0);
1692 
1693     for (i = 0;i < nr;i++) {
1694          ctxt->nsNr--;
1695 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1696     }
1697     return(nr);
1698 }
1699 #endif
1700 
1701 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1702 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1703     const xmlChar **atts;
1704     int *attallocs;
1705     int maxatts;
1706 
1707     if (ctxt->atts == NULL) {
1708 	maxatts = 55; /* allow for 10 attrs by default */
1709 	atts = (const xmlChar **)
1710 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1711 	if (atts == NULL) goto mem_error;
1712 	ctxt->atts = atts;
1713 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714 	if (attallocs == NULL) goto mem_error;
1715 	ctxt->attallocs = attallocs;
1716 	ctxt->maxatts = maxatts;
1717     } else if (nr + 5 > ctxt->maxatts) {
1718 	maxatts = (nr + 5) * 2;
1719 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720 				     maxatts * sizeof(const xmlChar *));
1721 	if (atts == NULL) goto mem_error;
1722 	ctxt->atts = atts;
1723 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724 	                             (maxatts / 5) * sizeof(int));
1725 	if (attallocs == NULL) goto mem_error;
1726 	ctxt->attallocs = attallocs;
1727 	ctxt->maxatts = maxatts;
1728     }
1729     return(ctxt->maxatts);
1730 mem_error:
1731     xmlErrMemory(ctxt, NULL);
1732     return(-1);
1733 }
1734 
1735 /**
1736  * inputPush:
1737  * @ctxt:  an XML parser context
1738  * @value:  the parser input
1739  *
1740  * Pushes a new parser input on top of the input stack
1741  *
1742  * Returns -1 in case of error, the index in the stack otherwise
1743  */
1744 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1745 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1746 {
1747     if ((ctxt == NULL) || (value == NULL))
1748         return(-1);
1749     if (ctxt->inputNr >= ctxt->inputMax) {
1750         ctxt->inputMax *= 2;
1751         ctxt->inputTab =
1752             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1753                                              ctxt->inputMax *
1754                                              sizeof(ctxt->inputTab[0]));
1755         if (ctxt->inputTab == NULL) {
1756             xmlErrMemory(ctxt, NULL);
1757 	    ctxt->inputMax /= 2;
1758             return (-1);
1759         }
1760     }
1761     ctxt->inputTab[ctxt->inputNr] = value;
1762     ctxt->input = value;
1763     return (ctxt->inputNr++);
1764 }
1765 /**
1766  * inputPop:
1767  * @ctxt: an XML parser context
1768  *
1769  * Pops the top parser input from the input stack
1770  *
1771  * Returns the input just removed
1772  */
1773 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1774 inputPop(xmlParserCtxtPtr ctxt)
1775 {
1776     xmlParserInputPtr ret;
1777 
1778     if (ctxt == NULL)
1779         return(NULL);
1780     if (ctxt->inputNr <= 0)
1781         return (NULL);
1782     ctxt->inputNr--;
1783     if (ctxt->inputNr > 0)
1784         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785     else
1786         ctxt->input = NULL;
1787     ret = ctxt->inputTab[ctxt->inputNr];
1788     ctxt->inputTab[ctxt->inputNr] = NULL;
1789     return (ret);
1790 }
1791 /**
1792  * nodePush:
1793  * @ctxt:  an XML parser context
1794  * @value:  the element node
1795  *
1796  * Pushes a new element node on top of the node stack
1797  *
1798  * Returns -1 in case of error, the index in the stack otherwise
1799  */
1800 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1801 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1802 {
1803     if (ctxt == NULL) return(0);
1804     if (ctxt->nodeNr >= ctxt->nodeMax) {
1805         xmlNodePtr *tmp;
1806 
1807 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808                                       ctxt->nodeMax * 2 *
1809                                       sizeof(ctxt->nodeTab[0]));
1810         if (tmp == NULL) {
1811             xmlErrMemory(ctxt, NULL);
1812             return (-1);
1813         }
1814         ctxt->nodeTab = tmp;
1815 	ctxt->nodeMax *= 2;
1816     }
1817     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1821 			  xmlParserMaxDepth);
1822 	xmlHaltParser(ctxt);
1823 	return(-1);
1824     }
1825     ctxt->nodeTab[ctxt->nodeNr] = value;
1826     ctxt->node = value;
1827     return (ctxt->nodeNr++);
1828 }
1829 
1830 /**
1831  * nodePop:
1832  * @ctxt: an XML parser context
1833  *
1834  * Pops the top element node from the node stack
1835  *
1836  * Returns the node just removed
1837  */
1838 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1839 nodePop(xmlParserCtxtPtr ctxt)
1840 {
1841     xmlNodePtr ret;
1842 
1843     if (ctxt == NULL) return(NULL);
1844     if (ctxt->nodeNr <= 0)
1845         return (NULL);
1846     ctxt->nodeNr--;
1847     if (ctxt->nodeNr > 0)
1848         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849     else
1850         ctxt->node = NULL;
1851     ret = ctxt->nodeTab[ctxt->nodeNr];
1852     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853     return (ret);
1854 }
1855 
1856 /**
1857  * nameNsPush:
1858  * @ctxt:  an XML parser context
1859  * @value:  the element name
1860  * @prefix:  the element prefix
1861  * @URI:  the element namespace name
1862  * @line:  the current line number for error messages
1863  * @nsNr:  the number of namespaces pushed on the namespace table
1864  *
1865  * Pushes a new element name/prefix/URL on top of the name stack
1866  *
1867  * Returns -1 in case of error, the index in the stack otherwise
1868  */
1869 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1870 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1871            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872 {
1873     xmlStartTag *tag;
1874 
1875     if (ctxt->nameNr >= ctxt->nameMax) {
1876         const xmlChar * *tmp;
1877         xmlStartTag *tmp2;
1878         ctxt->nameMax *= 2;
1879         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880                                     ctxt->nameMax *
1881                                     sizeof(ctxt->nameTab[0]));
1882         if (tmp == NULL) {
1883 	    ctxt->nameMax /= 2;
1884 	    goto mem_error;
1885         }
1886 	ctxt->nameTab = tmp;
1887         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888                                     ctxt->nameMax *
1889                                     sizeof(ctxt->pushTab[0]));
1890         if (tmp2 == NULL) {
1891 	    ctxt->nameMax /= 2;
1892 	    goto mem_error;
1893         }
1894 	ctxt->pushTab = tmp2;
1895     } else if (ctxt->pushTab == NULL) {
1896         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897                                             sizeof(ctxt->pushTab[0]));
1898         if (ctxt->pushTab == NULL)
1899             goto mem_error;
1900     }
1901     ctxt->nameTab[ctxt->nameNr] = value;
1902     ctxt->name = value;
1903     tag = &ctxt->pushTab[ctxt->nameNr];
1904     tag->prefix = prefix;
1905     tag->URI = URI;
1906     tag->line = line;
1907     tag->nsNr = nsNr;
1908     return (ctxt->nameNr++);
1909 mem_error:
1910     xmlErrMemory(ctxt, NULL);
1911     return (-1);
1912 }
1913 #ifdef LIBXML_PUSH_ENABLED
1914 /**
1915  * nameNsPop:
1916  * @ctxt: an XML parser context
1917  *
1918  * Pops the top element/prefix/URI name from the name stack
1919  *
1920  * Returns the name just removed
1921  */
1922 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1923 nameNsPop(xmlParserCtxtPtr ctxt)
1924 {
1925     const xmlChar *ret;
1926 
1927     if (ctxt->nameNr <= 0)
1928         return (NULL);
1929     ctxt->nameNr--;
1930     if (ctxt->nameNr > 0)
1931         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932     else
1933         ctxt->name = NULL;
1934     ret = ctxt->nameTab[ctxt->nameNr];
1935     ctxt->nameTab[ctxt->nameNr] = NULL;
1936     return (ret);
1937 }
1938 #endif /* LIBXML_PUSH_ENABLED */
1939 
1940 /**
1941  * namePush:
1942  * @ctxt:  an XML parser context
1943  * @value:  the element name
1944  *
1945  * Pushes a new element name on top of the name stack
1946  *
1947  * Returns -1 in case of error, the index in the stack otherwise
1948  */
1949 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1950 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1951 {
1952     if (ctxt == NULL) return (-1);
1953 
1954     if (ctxt->nameNr >= ctxt->nameMax) {
1955         const xmlChar * *tmp;
1956         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957                                     ctxt->nameMax * 2 *
1958                                     sizeof(ctxt->nameTab[0]));
1959         if (tmp == NULL) {
1960 	    goto mem_error;
1961         }
1962 	ctxt->nameTab = tmp;
1963         ctxt->nameMax *= 2;
1964     }
1965     ctxt->nameTab[ctxt->nameNr] = value;
1966     ctxt->name = value;
1967     return (ctxt->nameNr++);
1968 mem_error:
1969     xmlErrMemory(ctxt, NULL);
1970     return (-1);
1971 }
1972 /**
1973  * namePop:
1974  * @ctxt: an XML parser context
1975  *
1976  * Pops the top element name from the name stack
1977  *
1978  * Returns the name just removed
1979  */
1980 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1981 namePop(xmlParserCtxtPtr ctxt)
1982 {
1983     const xmlChar *ret;
1984 
1985     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986         return (NULL);
1987     ctxt->nameNr--;
1988     if (ctxt->nameNr > 0)
1989         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990     else
1991         ctxt->name = NULL;
1992     ret = ctxt->nameTab[ctxt->nameNr];
1993     ctxt->nameTab[ctxt->nameNr] = NULL;
1994     return (ret);
1995 }
1996 
spacePush(xmlParserCtxtPtr ctxt,int val)1997 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998     if (ctxt->spaceNr >= ctxt->spaceMax) {
1999         int *tmp;
2000 
2001 	ctxt->spaceMax *= 2;
2002         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004         if (tmp == NULL) {
2005 	    xmlErrMemory(ctxt, NULL);
2006 	    ctxt->spaceMax /=2;
2007 	    return(-1);
2008 	}
2009 	ctxt->spaceTab = tmp;
2010     }
2011     ctxt->spaceTab[ctxt->spaceNr] = val;
2012     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013     return(ctxt->spaceNr++);
2014 }
2015 
spacePop(xmlParserCtxtPtr ctxt)2016 static int spacePop(xmlParserCtxtPtr ctxt) {
2017     int ret;
2018     if (ctxt->spaceNr <= 0) return(0);
2019     ctxt->spaceNr--;
2020     if (ctxt->spaceNr > 0)
2021 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022     else
2023         ctxt->space = &ctxt->spaceTab[0];
2024     ret = ctxt->spaceTab[ctxt->spaceNr];
2025     ctxt->spaceTab[ctxt->spaceNr] = -1;
2026     return(ret);
2027 }
2028 
2029 /*
2030  * Macros for accessing the content. Those should be used only by the parser,
2031  * and not exported.
2032  *
2033  * Dirty macros, i.e. one often need to make assumption on the context to
2034  * use them
2035  *
2036  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2037  *           To be used with extreme caution since operations consuming
2038  *           characters may move the input buffer to a different location !
2039  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2040  *           This should be used internally by the parser
2041  *           only to compare to ASCII values otherwise it would break when
2042  *           running with UTF-8 encoding.
2043  *   RAW     same as CUR but in the input buffer, bypass any token
2044  *           extraction that may have been done
2045  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2046  *           to compare on ASCII based substring.
2047  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048  *           strings without newlines within the parser.
2049  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050  *           defined char within the parser.
2051  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052  *
2053  *   NEXT    Skip to the next character, this does the proper decoding
2054  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2056  *   CUR_CHAR(l) returns the current unicode character (int), set l
2057  *           to the number of xmlChars used for the encoding [0-5].
2058  *   CUR_SCHAR  same but operate on a string instead of the context
2059  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2060  *            the index
2061  *   GROW, SHRINK  handling of input buffers
2062  */
2063 
2064 #define RAW (*ctxt->input->cur)
2065 #define CUR (*ctxt->input->cur)
2066 #define NXT(val) ctxt->input->cur[(val)]
2067 #define CUR_PTR ctxt->input->cur
2068 #define BASE_PTR ctxt->input->base
2069 
2070 #define CMP4( s, c1, c2, c3, c4 ) \
2071   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2074   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083     ((unsigned char *) s)[ 8 ] == c9 )
2084 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086     ((unsigned char *) s)[ 9 ] == c10 )
2087 
2088 #define SKIP(val) do {							\
2089     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2090     if (*ctxt->input->cur == 0)						\
2091         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2092   } while (0)
2093 
2094 #define SKIPL(val) do {							\
2095     int skipl;								\
2096     for(skipl=0; skipl<val; skipl++) {					\
2097 	if (*(ctxt->input->cur) == '\n') {				\
2098 	ctxt->input->line++; ctxt->input->col = 1;			\
2099 	} else ctxt->input->col++;					\
2100 	ctxt->input->cur++;						\
2101     }									\
2102     if (*ctxt->input->cur == 0)						\
2103         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2104   } while (0)
2105 
2106 #define SHRINK if ((ctxt->progressive == 0) &&				\
2107 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109 	xmlSHRINK (ctxt);
2110 
xmlSHRINK(xmlParserCtxtPtr ctxt)2111 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112     xmlParserInputShrink(ctxt->input);
2113     if (*ctxt->input->cur == 0)
2114         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 }
2116 
2117 #define GROW if ((ctxt->progressive == 0) &&				\
2118 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2119 	xmlGROW (ctxt);
2120 
xmlGROW(xmlParserCtxtPtr ctxt)2121 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122     ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123     ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124 
2125     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127          ((ctxt->input->buf) &&
2128           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2129         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131         xmlHaltParser(ctxt);
2132 	return;
2133     }
2134     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2135     if ((ctxt->input->cur > ctxt->input->end) ||
2136         (ctxt->input->cur < ctxt->input->base)) {
2137         xmlHaltParser(ctxt);
2138         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139 	return;
2140     }
2141     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2142         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2143 }
2144 
2145 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146 
2147 #define NEXT xmlNextChar(ctxt)
2148 
2149 #define NEXT1 {								\
2150 	ctxt->input->col++;						\
2151 	ctxt->input->cur++;						\
2152 	if (*ctxt->input->cur == 0)					\
2153 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2154     }
2155 
2156 #define NEXTL(l) do {							\
2157     if (*(ctxt->input->cur) == '\n') {					\
2158 	ctxt->input->line++; ctxt->input->col = 1;			\
2159     } else ctxt->input->col++;						\
2160     ctxt->input->cur += l;				\
2161   } while (0)
2162 
2163 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165 
2166 #define COPY_BUF(l,b,i,v)						\
2167     if (l == 1) b[i++] = (xmlChar) v;					\
2168     else i += xmlCopyCharMultiByte(&b[i],v)
2169 
2170 #define CUR_CONSUMED \
2171     (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172 
2173 /**
2174  * xmlSkipBlankChars:
2175  * @ctxt:  the XML parser context
2176  *
2177  * skip all blanks character found at that point in the input streams.
2178  * It pops up finished entities in the process if allowable at that point.
2179  *
2180  * Returns the number of space chars skipped
2181  */
2182 
2183 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2184 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2185     int res = 0;
2186 
2187     /*
2188      * It's Okay to use CUR/NEXT here since all the blanks are on
2189      * the ASCII range.
2190      */
2191     if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192         (ctxt->instate == XML_PARSER_START)) {
2193 	const xmlChar *cur;
2194 	/*
2195 	 * if we are in the document content, go really fast
2196 	 */
2197 	cur = ctxt->input->cur;
2198 	while (IS_BLANK_CH(*cur)) {
2199 	    if (*cur == '\n') {
2200 		ctxt->input->line++; ctxt->input->col = 1;
2201 	    } else {
2202 		ctxt->input->col++;
2203 	    }
2204 	    cur++;
2205 	    if (res < INT_MAX)
2206 		res++;
2207 	    if (*cur == 0) {
2208 		ctxt->input->cur = cur;
2209 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2210 		cur = ctxt->input->cur;
2211 	    }
2212 	}
2213 	ctxt->input->cur = cur;
2214     } else {
2215         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216 
2217 	while (1) {
2218             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219 		NEXT;
2220 	    } else if (CUR == '%') {
2221                 /*
2222                  * Need to handle support of entities branching here
2223                  */
2224 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225                     break;
2226 	        xmlParsePEReference(ctxt);
2227             } else if (CUR == 0) {
2228                 if (ctxt->inputNr <= 1)
2229                     break;
2230                 xmlPopInput(ctxt);
2231             } else {
2232                 break;
2233             }
2234 
2235             /*
2236              * Also increase the counter when entering or exiting a PERef.
2237              * The spec says: "When a parameter-entity reference is recognized
2238              * in the DTD and included, its replacement text MUST be enlarged
2239              * by the attachment of one leading and one following space (#x20)
2240              * character."
2241              */
2242 	    if (res < INT_MAX)
2243 		res++;
2244         }
2245     }
2246     return(res);
2247 }
2248 
2249 /************************************************************************
2250  *									*
2251  *		Commodity functions to handle entities			*
2252  *									*
2253  ************************************************************************/
2254 
2255 /**
2256  * xmlPopInput:
2257  * @ctxt:  an XML parser context
2258  *
2259  * xmlPopInput: the current input pointed by ctxt->input came to an end
2260  *          pop it and return the next char.
2261  *
2262  * Returns the current xmlChar in the parser context
2263  */
2264 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2265 xmlPopInput(xmlParserCtxtPtr ctxt) {
2266     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2267     if (xmlParserDebugEntities)
2268 	xmlGenericError(xmlGenericErrorContext,
2269 		"Popping input %d\n", ctxt->inputNr);
2270     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271         (ctxt->instate != XML_PARSER_EOF))
2272         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2273                     "Unfinished entity outside the DTD");
2274     xmlFreeInputStream(inputPop(ctxt));
2275     if (*ctxt->input->cur == 0)
2276         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2277     return(CUR);
2278 }
2279 
2280 /**
2281  * xmlPushInput:
2282  * @ctxt:  an XML parser context
2283  * @input:  an XML parser input fragment (entity, XML fragment ...).
2284  *
2285  * xmlPushInput: switch to a new input stream which is stacked on top
2286  *               of the previous one(s).
2287  * Returns -1 in case of error or the index in the input stack
2288  */
2289 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2290 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2291     int ret;
2292     if (input == NULL) return(-1);
2293 
2294     if (xmlParserDebugEntities) {
2295 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2296 	    xmlGenericError(xmlGenericErrorContext,
2297 		    "%s(%d): ", ctxt->input->filename,
2298 		    ctxt->input->line);
2299 	xmlGenericError(xmlGenericErrorContext,
2300 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301     }
2302     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303         (ctxt->inputNr > 1024)) {
2304         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2305         while (ctxt->inputNr > 1)
2306             xmlFreeInputStream(inputPop(ctxt));
2307 	return(-1);
2308     }
2309     ret = inputPush(ctxt, input);
2310     if (ctxt->instate == XML_PARSER_EOF)
2311         return(-1);
2312     GROW;
2313     return(ret);
2314 }
2315 
2316 /**
2317  * xmlParseCharRef:
2318  * @ctxt:  an XML parser context
2319  *
2320  * parse Reference declarations
2321  *
2322  * [66] CharRef ::= '&#' [0-9]+ ';' |
2323  *                  '&#x' [0-9a-fA-F]+ ';'
2324  *
2325  * [ WFC: Legal Character ]
2326  * Characters referred to using character references must match the
2327  * production for Char.
2328  *
2329  * Returns the value parsed (as an int), 0 in case of error
2330  */
2331 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2332 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2333     int val = 0;
2334     int count = 0;
2335 
2336     /*
2337      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338      */
2339     if ((RAW == '&') && (NXT(1) == '#') &&
2340         (NXT(2) == 'x')) {
2341 	SKIP(3);
2342 	GROW;
2343 	while (RAW != ';') { /* loop blocked by count */
2344 	    if (count++ > 20) {
2345 		count = 0;
2346 		GROW;
2347                 if (ctxt->instate == XML_PARSER_EOF)
2348                     return(0);
2349 	    }
2350 	    if ((RAW >= '0') && (RAW <= '9'))
2351 	        val = val * 16 + (CUR - '0');
2352 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353 	        val = val * 16 + (CUR - 'a') + 10;
2354 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355 	        val = val * 16 + (CUR - 'A') + 10;
2356 	    else {
2357 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2358 		val = 0;
2359 		break;
2360 	    }
2361 	    if (val > 0x110000)
2362 	        val = 0x110000;
2363 
2364 	    NEXT;
2365 	    count++;
2366 	}
2367 	if (RAW == ';') {
2368 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369 	    ctxt->input->col++;
2370 	    ctxt->input->cur++;
2371 	}
2372     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2373 	SKIP(2);
2374 	GROW;
2375 	while (RAW != ';') { /* loop blocked by count */
2376 	    if (count++ > 20) {
2377 		count = 0;
2378 		GROW;
2379                 if (ctxt->instate == XML_PARSER_EOF)
2380                     return(0);
2381 	    }
2382 	    if ((RAW >= '0') && (RAW <= '9'))
2383 	        val = val * 10 + (CUR - '0');
2384 	    else {
2385 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2386 		val = 0;
2387 		break;
2388 	    }
2389 	    if (val > 0x110000)
2390 	        val = 0x110000;
2391 
2392 	    NEXT;
2393 	    count++;
2394 	}
2395 	if (RAW == ';') {
2396 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397 	    ctxt->input->col++;
2398 	    ctxt->input->cur++;
2399 	}
2400     } else {
2401         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2402     }
2403 
2404     /*
2405      * [ WFC: Legal Character ]
2406      * Characters referred to using character references must match the
2407      * production for Char.
2408      */
2409     if (val >= 0x110000) {
2410         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411                 "xmlParseCharRef: character reference out of bounds\n",
2412 	        val);
2413     } else if (IS_CHAR(val)) {
2414         return(val);
2415     } else {
2416         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417                           "xmlParseCharRef: invalid xmlChar value %d\n",
2418 	                  val);
2419     }
2420     return(0);
2421 }
2422 
2423 /**
2424  * xmlParseStringCharRef:
2425  * @ctxt:  an XML parser context
2426  * @str:  a pointer to an index in the string
2427  *
2428  * parse Reference declarations, variant parsing from a string rather
2429  * than an an input flow.
2430  *
2431  * [66] CharRef ::= '&#' [0-9]+ ';' |
2432  *                  '&#x' [0-9a-fA-F]+ ';'
2433  *
2434  * [ WFC: Legal Character ]
2435  * Characters referred to using character references must match the
2436  * production for Char.
2437  *
2438  * Returns the value parsed (as an int), 0 in case of error, str will be
2439  *         updated to the current value of the index
2440  */
2441 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2442 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2443     const xmlChar *ptr;
2444     xmlChar cur;
2445     int val = 0;
2446 
2447     if ((str == NULL) || (*str == NULL)) return(0);
2448     ptr = *str;
2449     cur = *ptr;
2450     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451 	ptr += 3;
2452 	cur = *ptr;
2453 	while (cur != ';') { /* Non input consuming loop */
2454 	    if ((cur >= '0') && (cur <= '9'))
2455 	        val = val * 16 + (cur - '0');
2456 	    else if ((cur >= 'a') && (cur <= 'f'))
2457 	        val = val * 16 + (cur - 'a') + 10;
2458 	    else if ((cur >= 'A') && (cur <= 'F'))
2459 	        val = val * 16 + (cur - 'A') + 10;
2460 	    else {
2461 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2462 		val = 0;
2463 		break;
2464 	    }
2465 	    if (val > 0x110000)
2466 	        val = 0x110000;
2467 
2468 	    ptr++;
2469 	    cur = *ptr;
2470 	}
2471 	if (cur == ';')
2472 	    ptr++;
2473     } else if  ((cur == '&') && (ptr[1] == '#')){
2474 	ptr += 2;
2475 	cur = *ptr;
2476 	while (cur != ';') { /* Non input consuming loops */
2477 	    if ((cur >= '0') && (cur <= '9'))
2478 	        val = val * 10 + (cur - '0');
2479 	    else {
2480 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2481 		val = 0;
2482 		break;
2483 	    }
2484 	    if (val > 0x110000)
2485 	        val = 0x110000;
2486 
2487 	    ptr++;
2488 	    cur = *ptr;
2489 	}
2490 	if (cur == ';')
2491 	    ptr++;
2492     } else {
2493 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2494 	return(0);
2495     }
2496     *str = ptr;
2497 
2498     /*
2499      * [ WFC: Legal Character ]
2500      * Characters referred to using character references must match the
2501      * production for Char.
2502      */
2503     if (val >= 0x110000) {
2504         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505                 "xmlParseStringCharRef: character reference out of bounds\n",
2506                 val);
2507     } else if (IS_CHAR(val)) {
2508         return(val);
2509     } else {
2510         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512 			  val);
2513     }
2514     return(0);
2515 }
2516 
2517 /**
2518  * xmlParserHandlePEReference:
2519  * @ctxt:  the parser context
2520  *
2521  * [69] PEReference ::= '%' Name ';'
2522  *
2523  * [ WFC: No Recursion ]
2524  * A parsed entity must not contain a recursive
2525  * reference to itself, either directly or indirectly.
2526  *
2527  * [ WFC: Entity Declared ]
2528  * In a document without any DTD, a document with only an internal DTD
2529  * subset which contains no parameter entity references, or a document
2530  * with "standalone='yes'", ...  ... The declaration of a parameter
2531  * entity must precede any reference to it...
2532  *
2533  * [ VC: Entity Declared ]
2534  * In a document with an external subset or external parameter entities
2535  * with "standalone='no'", ...  ... The declaration of a parameter entity
2536  * must precede any reference to it...
2537  *
2538  * [ WFC: In DTD ]
2539  * Parameter-entity references may only appear in the DTD.
2540  * NOTE: misleading but this is handled.
2541  *
2542  * A PEReference may have been detected in the current input stream
2543  * the handling is done accordingly to
2544  *      http://www.w3.org/TR/REC-xml#entproc
2545  * i.e.
2546  *   - Included in literal in entity values
2547  *   - Included as Parameter Entity reference within DTDs
2548  */
2549 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2550 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2551     switch(ctxt->instate) {
2552 	case XML_PARSER_CDATA_SECTION:
2553 	    return;
2554         case XML_PARSER_COMMENT:
2555 	    return;
2556 	case XML_PARSER_START_TAG:
2557 	    return;
2558 	case XML_PARSER_END_TAG:
2559 	    return;
2560         case XML_PARSER_EOF:
2561 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2562 	    return;
2563         case XML_PARSER_PROLOG:
2564 	case XML_PARSER_START:
2565 	case XML_PARSER_MISC:
2566 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2567 	    return;
2568 	case XML_PARSER_ENTITY_DECL:
2569         case XML_PARSER_CONTENT:
2570         case XML_PARSER_ATTRIBUTE_VALUE:
2571         case XML_PARSER_PI:
2572 	case XML_PARSER_SYSTEM_LITERAL:
2573 	case XML_PARSER_PUBLIC_LITERAL:
2574 	    /* we just ignore it there */
2575 	    return;
2576         case XML_PARSER_EPILOG:
2577 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2578 	    return;
2579 	case XML_PARSER_ENTITY_VALUE:
2580 	    /*
2581 	     * NOTE: in the case of entity values, we don't do the
2582 	     *       substitution here since we need the literal
2583 	     *       entity value to be able to save the internal
2584 	     *       subset of the document.
2585 	     *       This will be handled by xmlStringDecodeEntities
2586 	     */
2587 	    return;
2588         case XML_PARSER_DTD:
2589 	    /*
2590 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591 	     * In the internal DTD subset, parameter-entity references
2592 	     * can occur only where markup declarations can occur, not
2593 	     * within markup declarations.
2594 	     * In that case this is handled in xmlParseMarkupDecl
2595 	     */
2596 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597 		return;
2598 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599 		return;
2600             break;
2601         case XML_PARSER_IGNORE:
2602             return;
2603     }
2604 
2605     xmlParsePEReference(ctxt);
2606 }
2607 
2608 /*
2609  * Macro used to grow the current buffer.
2610  * buffer##_size is expected to be a size_t
2611  * mem_error: is expected to handle memory allocation failures
2612  */
2613 #define growBuffer(buffer, n) {						\
2614     xmlChar *tmp;							\
2615     size_t new_size = buffer##_size * 2 + n;                            \
2616     if (new_size < buffer##_size) goto mem_error;                       \
2617     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2618     if (tmp == NULL) goto mem_error;					\
2619     buffer = tmp;							\
2620     buffer##_size = new_size;                                           \
2621 }
2622 
2623 /**
2624  * xmlStringLenDecodeEntities:
2625  * @ctxt:  the parser context
2626  * @str:  the input string
2627  * @len: the string length
2628  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629  * @end:  an end marker xmlChar, 0 if none
2630  * @end2:  an end marker xmlChar, 0 if none
2631  * @end3:  an end marker xmlChar, 0 if none
2632  *
2633  * Takes a entity string content and process to do the adequate substitutions.
2634  *
2635  * [67] Reference ::= EntityRef | CharRef
2636  *
2637  * [69] PEReference ::= '%' Name ';'
2638  *
2639  * Returns A newly allocated string with the substitution done. The caller
2640  *      must deallocate it !
2641  */
2642 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2643 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2644 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2645     xmlChar *buffer = NULL;
2646     size_t buffer_size = 0;
2647     size_t nbchars = 0;
2648 
2649     xmlChar *current = NULL;
2650     xmlChar *rep = NULL;
2651     const xmlChar *last;
2652     xmlEntityPtr ent;
2653     int c,l;
2654 
2655     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656 	return(NULL);
2657     last = str + len;
2658 
2659     if (((ctxt->depth > 40) &&
2660          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661 	(ctxt->depth > 1024)) {
2662 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2663 	return(NULL);
2664     }
2665 
2666     /*
2667      * allocate a translation buffer.
2668      */
2669     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2670     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2671     if (buffer == NULL) goto mem_error;
2672 
2673     /*
2674      * OK loop until we reach one of the ending char or a size limit.
2675      * we are operating on already parsed values.
2676      */
2677     if (str < last)
2678 	c = CUR_SCHAR(str, l);
2679     else
2680         c = 0;
2681     while ((c != 0) && (c != end) && /* non input consuming loop */
2682            (c != end2) && (c != end3) &&
2683            (ctxt->instate != XML_PARSER_EOF)) {
2684 
2685 	if (c == 0) break;
2686         if ((c == '&') && (str[1] == '#')) {
2687 	    int val = xmlParseStringCharRef(ctxt, &str);
2688 	    if (val == 0)
2689                 goto int_error;
2690 	    COPY_BUF(0,buffer,nbchars,val);
2691 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2693 	    }
2694 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2695 	    if (xmlParserDebugEntities)
2696 		xmlGenericError(xmlGenericErrorContext,
2697 			"String decoding Entity Reference: %.30s\n",
2698 			str);
2699 	    ent = xmlParseStringEntityRef(ctxt, &str);
2700 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2701 	    if (ent != NULL)
2702 	        ctxt->nbentities += ent->checked / 2;
2703 	    if ((ent != NULL) &&
2704 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705 		if (ent->content != NULL) {
2706 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2708 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2709 		    }
2710 		} else {
2711 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712 			    "predefined entity has no content\n");
2713                     goto int_error;
2714 		}
2715 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2716 		ctxt->depth++;
2717 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718 			                      0, 0, 0);
2719 		ctxt->depth--;
2720 		if (rep == NULL) {
2721                     ent->content[0] = 0;
2722                     goto int_error;
2723                 }
2724 
2725                 current = rep;
2726                 while (*current != 0) { /* non input consuming loop */
2727                     buffer[nbchars++] = *current++;
2728                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730                             goto int_error;
2731                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2732                     }
2733                 }
2734                 xmlFree(rep);
2735                 rep = NULL;
2736 	    } else if (ent != NULL) {
2737 		int i = xmlStrlen(ent->name);
2738 		const xmlChar *cur = ent->name;
2739 
2740 		buffer[nbchars++] = '&';
2741 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2742 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2743 		}
2744 		for (;i > 0;i--)
2745 		    buffer[nbchars++] = *cur++;
2746 		buffer[nbchars++] = ';';
2747 	    }
2748 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2749 	    if (xmlParserDebugEntities)
2750 		xmlGenericError(xmlGenericErrorContext,
2751 			"String decoding PE Reference: %.30s\n", str);
2752 	    ent = xmlParseStringPEReference(ctxt, &str);
2753 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2754 	    if (ent != NULL)
2755 	        ctxt->nbentities += ent->checked / 2;
2756 	    if (ent != NULL) {
2757                 if (ent->content == NULL) {
2758 		    /*
2759 		     * Note: external parsed entities will not be loaded,
2760 		     * it is not required for a non-validating parser to
2761 		     * complete external PEReferences coming from the
2762 		     * internal subset
2763 		     */
2764 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766 			(ctxt->validate != 0)) {
2767 			xmlLoadEntityContent(ctxt, ent);
2768 		    } else {
2769 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770 		  "not validating will not read content for PE entity %s\n",
2771 		                      ent->name, NULL);
2772 		    }
2773 		}
2774 		ctxt->depth++;
2775 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776 			                      0, 0, 0);
2777 		ctxt->depth--;
2778 		if (rep == NULL) {
2779                     if (ent->content != NULL)
2780                         ent->content[0] = 0;
2781                     goto int_error;
2782                 }
2783                 current = rep;
2784                 while (*current != 0) { /* non input consuming loop */
2785                     buffer[nbchars++] = *current++;
2786                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788                             goto int_error;
2789                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790                     }
2791                 }
2792                 xmlFree(rep);
2793                 rep = NULL;
2794 	    }
2795 	} else {
2796 	    COPY_BUF(l,buffer,nbchars,c);
2797 	    str += l;
2798 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2799 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800 	    }
2801 	}
2802 	if (str < last)
2803 	    c = CUR_SCHAR(str, l);
2804 	else
2805 	    c = 0;
2806     }
2807     buffer[nbchars] = 0;
2808     return(buffer);
2809 
2810 mem_error:
2811     xmlErrMemory(ctxt, NULL);
2812 int_error:
2813     if (rep != NULL)
2814         xmlFree(rep);
2815     if (buffer != NULL)
2816         xmlFree(buffer);
2817     return(NULL);
2818 }
2819 
2820 /**
2821  * xmlStringDecodeEntities:
2822  * @ctxt:  the parser context
2823  * @str:  the input string
2824  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825  * @end:  an end marker xmlChar, 0 if none
2826  * @end2:  an end marker xmlChar, 0 if none
2827  * @end3:  an end marker xmlChar, 0 if none
2828  *
2829  * Takes a entity string content and process to do the adequate substitutions.
2830  *
2831  * [67] Reference ::= EntityRef | CharRef
2832  *
2833  * [69] PEReference ::= '%' Name ';'
2834  *
2835  * Returns A newly allocated string with the substitution done. The caller
2836  *      must deallocate it !
2837  */
2838 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2839 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2841     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843            end, end2, end3));
2844 }
2845 
2846 /************************************************************************
2847  *									*
2848  *		Commodity functions, cleanup needed ?			*
2849  *									*
2850  ************************************************************************/
2851 
2852 /**
2853  * areBlanks:
2854  * @ctxt:  an XML parser context
2855  * @str:  a xmlChar *
2856  * @len:  the size of @str
2857  * @blank_chars: we know the chars are blanks
2858  *
2859  * Is this a sequence of blank chars that one can ignore ?
2860  *
2861  * Returns 1 if ignorable 0 otherwise.
2862  */
2863 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2864 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865                      int blank_chars) {
2866     int i, ret;
2867     xmlNodePtr lastChild;
2868 
2869     /*
2870      * Don't spend time trying to differentiate them, the same callback is
2871      * used !
2872      */
2873     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874 	return(0);
2875 
2876     /*
2877      * Check for xml:space value.
2878      */
2879     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880         (*(ctxt->space) == -2))
2881 	return(0);
2882 
2883     /*
2884      * Check that the string is made of blanks
2885      */
2886     if (blank_chars == 0) {
2887 	for (i = 0;i < len;i++)
2888 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2889     }
2890 
2891     /*
2892      * Look if the element is mixed content in the DTD if available
2893      */
2894     if (ctxt->node == NULL) return(0);
2895     if (ctxt->myDoc != NULL) {
2896 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897         if (ret == 0) return(1);
2898         if (ret == 1) return(0);
2899     }
2900 
2901     /*
2902      * Otherwise, heuristic :-\
2903      */
2904     if ((RAW != '<') && (RAW != 0xD)) return(0);
2905     if ((ctxt->node->children == NULL) &&
2906 	(RAW == '<') && (NXT(1) == '/')) return(0);
2907 
2908     lastChild = xmlGetLastChild(ctxt->node);
2909     if (lastChild == NULL) {
2910         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911             (ctxt->node->content != NULL)) return(0);
2912     } else if (xmlNodeIsText(lastChild))
2913         return(0);
2914     else if ((ctxt->node->children != NULL) &&
2915              (xmlNodeIsText(ctxt->node->children)))
2916         return(0);
2917     return(1);
2918 }
2919 
2920 /************************************************************************
2921  *									*
2922  *		Extra stuff for namespace support			*
2923  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2924  *									*
2925  ************************************************************************/
2926 
2927 /**
2928  * xmlSplitQName:
2929  * @ctxt:  an XML parser context
2930  * @name:  an XML parser context
2931  * @prefix:  a xmlChar **
2932  *
2933  * parse an UTF8 encoded XML qualified name string
2934  *
2935  * [NS 5] QName ::= (Prefix ':')? LocalPart
2936  *
2937  * [NS 6] Prefix ::= NCName
2938  *
2939  * [NS 7] LocalPart ::= NCName
2940  *
2941  * Returns the local part, and prefix is updated
2942  *   to get the Prefix if any.
2943  */
2944 
2945 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2946 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947     xmlChar buf[XML_MAX_NAMELEN + 5];
2948     xmlChar *buffer = NULL;
2949     int len = 0;
2950     int max = XML_MAX_NAMELEN;
2951     xmlChar *ret = NULL;
2952     const xmlChar *cur = name;
2953     int c;
2954 
2955     if (prefix == NULL) return(NULL);
2956     *prefix = NULL;
2957 
2958     if (cur == NULL) return(NULL);
2959 
2960 #ifndef XML_XML_NAMESPACE
2961     /* xml: prefix is not really a namespace */
2962     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963         (cur[2] == 'l') && (cur[3] == ':'))
2964 	return(xmlStrdup(name));
2965 #endif
2966 
2967     /* nasty but well=formed */
2968     if (cur[0] == ':')
2969 	return(xmlStrdup(name));
2970 
2971     c = *cur++;
2972     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973 	buf[len++] = c;
2974 	c = *cur++;
2975     }
2976     if (len >= max) {
2977 	/*
2978 	 * Okay someone managed to make a huge name, so he's ready to pay
2979 	 * for the processing speed.
2980 	 */
2981 	max = len * 2;
2982 
2983 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984 	if (buffer == NULL) {
2985 	    xmlErrMemory(ctxt, NULL);
2986 	    return(NULL);
2987 	}
2988 	memcpy(buffer, buf, len);
2989 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990 	    if (len + 10 > max) {
2991 	        xmlChar *tmp;
2992 
2993 		max *= 2;
2994 		tmp = (xmlChar *) xmlRealloc(buffer,
2995 						max * sizeof(xmlChar));
2996 		if (tmp == NULL) {
2997 		    xmlFree(buffer);
2998 		    xmlErrMemory(ctxt, NULL);
2999 		    return(NULL);
3000 		}
3001 		buffer = tmp;
3002 	    }
3003 	    buffer[len++] = c;
3004 	    c = *cur++;
3005 	}
3006 	buffer[len] = 0;
3007     }
3008 
3009     if ((c == ':') && (*cur == 0)) {
3010         if (buffer != NULL)
3011 	    xmlFree(buffer);
3012 	*prefix = NULL;
3013 	return(xmlStrdup(name));
3014     }
3015 
3016     if (buffer == NULL)
3017 	ret = xmlStrndup(buf, len);
3018     else {
3019 	ret = buffer;
3020 	buffer = NULL;
3021 	max = XML_MAX_NAMELEN;
3022     }
3023 
3024 
3025     if (c == ':') {
3026 	c = *cur;
3027         *prefix = ret;
3028 	if (c == 0) {
3029 	    return(xmlStrndup(BAD_CAST "", 0));
3030 	}
3031 	len = 0;
3032 
3033 	/*
3034 	 * Check that the first character is proper to start
3035 	 * a new name
3036 	 */
3037 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038 	      ((c >= 0x41) && (c <= 0x5A)) ||
3039 	      (c == '_') || (c == ':'))) {
3040 	    int l;
3041 	    int first = CUR_SCHAR(cur, l);
3042 
3043 	    if (!IS_LETTER(first) && (first != '_')) {
3044 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045 			    "Name %s is not XML Namespace compliant\n",
3046 				  name);
3047 	    }
3048 	}
3049 	cur++;
3050 
3051 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052 	    buf[len++] = c;
3053 	    c = *cur++;
3054 	}
3055 	if (len >= max) {
3056 	    /*
3057 	     * Okay someone managed to make a huge name, so he's ready to pay
3058 	     * for the processing speed.
3059 	     */
3060 	    max = len * 2;
3061 
3062 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063 	    if (buffer == NULL) {
3064 	        xmlErrMemory(ctxt, NULL);
3065 		return(NULL);
3066 	    }
3067 	    memcpy(buffer, buf, len);
3068 	    while (c != 0) { /* tested bigname2.xml */
3069 		if (len + 10 > max) {
3070 		    xmlChar *tmp;
3071 
3072 		    max *= 2;
3073 		    tmp = (xmlChar *) xmlRealloc(buffer,
3074 						    max * sizeof(xmlChar));
3075 		    if (tmp == NULL) {
3076 			xmlErrMemory(ctxt, NULL);
3077 			xmlFree(buffer);
3078 			return(NULL);
3079 		    }
3080 		    buffer = tmp;
3081 		}
3082 		buffer[len++] = c;
3083 		c = *cur++;
3084 	    }
3085 	    buffer[len] = 0;
3086 	}
3087 
3088 	if (buffer == NULL)
3089 	    ret = xmlStrndup(buf, len);
3090 	else {
3091 	    ret = buffer;
3092 	}
3093     }
3094 
3095     return(ret);
3096 }
3097 
3098 /************************************************************************
3099  *									*
3100  *			The parser itself				*
3101  *	Relates to http://www.w3.org/TR/REC-xml				*
3102  *									*
3103  ************************************************************************/
3104 
3105 /************************************************************************
3106  *									*
3107  *	Routines to parse Name, NCName and NmToken			*
3108  *									*
3109  ************************************************************************/
3110 #ifdef DEBUG
3111 static unsigned long nbParseName = 0;
3112 static unsigned long nbParseNmToken = 0;
3113 static unsigned long nbParseNCName = 0;
3114 static unsigned long nbParseNCNameComplex = 0;
3115 static unsigned long nbParseNameComplex = 0;
3116 static unsigned long nbParseStringName = 0;
3117 #endif
3118 
3119 /*
3120  * The two following functions are related to the change of accepted
3121  * characters for Name and NmToken in the Revision 5 of XML-1.0
3122  * They correspond to the modified production [4] and the new production [4a]
3123  * changes in that revision. Also note that the macros used for the
3124  * productions Letter, Digit, CombiningChar and Extender are not needed
3125  * anymore.
3126  * We still keep compatibility to pre-revision5 parsing semantic if the
3127  * new XML_PARSE_OLD10 option is given to the parser.
3128  */
3129 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3130 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3131     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132         /*
3133 	 * Use the new checks of production [4] [4a] amd [5] of the
3134 	 * Update 5 of XML-1.0
3135 	 */
3136 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137 	    (((c >= 'a') && (c <= 'z')) ||
3138 	     ((c >= 'A') && (c <= 'Z')) ||
3139 	     (c == '_') || (c == ':') ||
3140 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3141 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3142 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3143 	     ((c >= 0x370) && (c <= 0x37D)) ||
3144 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3146 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3147 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 	    return(1);
3153     } else {
3154         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155 	    return(1);
3156     }
3157     return(0);
3158 }
3159 
3160 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3161 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3162     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163         /*
3164 	 * Use the new checks of production [4] [4a] amd [5] of the
3165 	 * Update 5 of XML-1.0
3166 	 */
3167 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168 	    (((c >= 'a') && (c <= 'z')) ||
3169 	     ((c >= 'A') && (c <= 'Z')) ||
3170 	     ((c >= '0') && (c <= '9')) || /* !start */
3171 	     (c == '_') || (c == ':') ||
3172 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3174 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3175 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3176 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177 	     ((c >= 0x370) && (c <= 0x37D)) ||
3178 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3180 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3182 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3187 	     return(1);
3188     } else {
3189         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190             (c == '.') || (c == '-') ||
3191 	    (c == '_') || (c == ':') ||
3192 	    (IS_COMBINING(c)) ||
3193 	    (IS_EXTENDER(c)))
3194 	    return(1);
3195     }
3196     return(0);
3197 }
3198 
3199 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3200                                           int *len, int *alloc, int normalize);
3201 
3202 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3203 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3204     int len = 0, l;
3205     int c;
3206     int count = 0;
3207     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3208                     XML_MAX_TEXT_LENGTH :
3209                     XML_MAX_NAME_LENGTH;
3210 
3211 #ifdef DEBUG
3212     nbParseNameComplex++;
3213 #endif
3214 
3215     /*
3216      * Handler for more complex cases
3217      */
3218     GROW;
3219     if (ctxt->instate == XML_PARSER_EOF)
3220         return(NULL);
3221     c = CUR_CHAR(l);
3222     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223         /*
3224 	 * Use the new checks of production [4] [4a] amd [5] of the
3225 	 * Update 5 of XML-1.0
3226 	 */
3227 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 	    (!(((c >= 'a') && (c <= 'z')) ||
3229 	       ((c >= 'A') && (c <= 'Z')) ||
3230 	       (c == '_') || (c == ':') ||
3231 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3232 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3233 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 	       ((c >= 0x370) && (c <= 0x37D)) ||
3235 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3237 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3238 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 	    return(NULL);
3244 	}
3245 	len += l;
3246 	NEXTL(l);
3247 	c = CUR_CHAR(l);
3248 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 	       (((c >= 'a') && (c <= 'z')) ||
3250 	        ((c >= 'A') && (c <= 'Z')) ||
3251 	        ((c >= '0') && (c <= '9')) || /* !start */
3252 	        (c == '_') || (c == ':') ||
3253 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3255 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3256 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 	        ((c >= 0x370) && (c <= 0x37D)) ||
3259 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3261 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3263 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3268 		)) {
3269 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 		count = 0;
3271 		GROW;
3272                 if (ctxt->instate == XML_PARSER_EOF)
3273                     return(NULL);
3274 	    }
3275             if (len <= INT_MAX - l)
3276 	        len += l;
3277 	    NEXTL(l);
3278 	    c = CUR_CHAR(l);
3279 	}
3280     } else {
3281 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282 	    (!IS_LETTER(c) && (c != '_') &&
3283 	     (c != ':'))) {
3284 	    return(NULL);
3285 	}
3286 	len += l;
3287 	NEXTL(l);
3288 	c = CUR_CHAR(l);
3289 
3290 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292 		(c == '.') || (c == '-') ||
3293 		(c == '_') || (c == ':') ||
3294 		(IS_COMBINING(c)) ||
3295 		(IS_EXTENDER(c)))) {
3296 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3297 		count = 0;
3298 		GROW;
3299                 if (ctxt->instate == XML_PARSER_EOF)
3300                     return(NULL);
3301 	    }
3302             if (len <= INT_MAX - l)
3303 	        len += l;
3304 	    NEXTL(l);
3305 	    c = CUR_CHAR(l);
3306 	}
3307     }
3308     if (len > maxLength) {
3309         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310         return(NULL);
3311     }
3312     if (ctxt->input->cur - ctxt->input->base < len) {
3313         /*
3314          * There were a couple of bugs where PERefs lead to to a change
3315          * of the buffer. Check the buffer size to avoid passing an invalid
3316          * pointer to xmlDictLookup.
3317          */
3318         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3319                     "unexpected change of input buffer");
3320         return (NULL);
3321     }
3322     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3325 }
3326 
3327 /**
3328  * xmlParseName:
3329  * @ctxt:  an XML parser context
3330  *
3331  * parse an XML name.
3332  *
3333  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334  *                  CombiningChar | Extender
3335  *
3336  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3337  *
3338  * [6] Names ::= Name (#x20 Name)*
3339  *
3340  * Returns the Name parsed or NULL
3341  */
3342 
3343 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3344 xmlParseName(xmlParserCtxtPtr ctxt) {
3345     const xmlChar *in;
3346     const xmlChar *ret;
3347     size_t count = 0;
3348     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349                        XML_MAX_TEXT_LENGTH :
3350                        XML_MAX_NAME_LENGTH;
3351 
3352     GROW;
3353 
3354 #ifdef DEBUG
3355     nbParseName++;
3356 #endif
3357 
3358     /*
3359      * Accelerator for simple ASCII names
3360      */
3361     in = ctxt->input->cur;
3362     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 	((*in >= 0x41) && (*in <= 0x5A)) ||
3364 	(*in == '_') || (*in == ':')) {
3365 	in++;
3366 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3368 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3369 	       (*in == '_') || (*in == '-') ||
3370 	       (*in == ':') || (*in == '.'))
3371 	    in++;
3372 	if ((*in > 0) && (*in < 0x80)) {
3373 	    count = in - ctxt->input->cur;
3374             if (count > maxLength) {
3375                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376                 return(NULL);
3377             }
3378 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379 	    ctxt->input->cur = in;
3380 	    ctxt->input->col += count;
3381 	    if (ret == NULL)
3382 	        xmlErrMemory(ctxt, NULL);
3383 	    return(ret);
3384 	}
3385     }
3386     /* accelerator for special cases */
3387     return(xmlParseNameComplex(ctxt));
3388 }
3389 
3390 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3391 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3392     int len = 0, l;
3393     int c;
3394     int count = 0;
3395     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3396                     XML_MAX_TEXT_LENGTH :
3397                     XML_MAX_NAME_LENGTH;
3398     size_t startPosition = 0;
3399 
3400 #ifdef DEBUG
3401     nbParseNCNameComplex++;
3402 #endif
3403 
3404     /*
3405      * Handler for more complex cases
3406      */
3407     GROW;
3408     startPosition = CUR_PTR - BASE_PTR;
3409     c = CUR_CHAR(l);
3410     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412 	return(NULL);
3413     }
3414 
3415     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3418 	    count = 0;
3419 	    GROW;
3420             if (ctxt->instate == XML_PARSER_EOF)
3421                 return(NULL);
3422 	}
3423         if (len <= INT_MAX - l)
3424 	    len += l;
3425 	NEXTL(l);
3426 	c = CUR_CHAR(l);
3427 	if (c == 0) {
3428 	    count = 0;
3429 	    /*
3430 	     * when shrinking to extend the buffer we really need to preserve
3431 	     * the part of the name we already parsed. Hence rolling back
3432 	     * by current length.
3433 	     */
3434 	    ctxt->input->cur -= l;
3435 	    GROW;
3436             if (ctxt->instate == XML_PARSER_EOF)
3437                 return(NULL);
3438 	    ctxt->input->cur += l;
3439 	    c = CUR_CHAR(l);
3440 	}
3441     }
3442     if (len > maxLength) {
3443         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444         return(NULL);
3445     }
3446     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3447 }
3448 
3449 /**
3450  * xmlParseNCName:
3451  * @ctxt:  an XML parser context
3452  * @len:  length of the string parsed
3453  *
3454  * parse an XML name.
3455  *
3456  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3457  *                      CombiningChar | Extender
3458  *
3459  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3460  *
3461  * Returns the Name parsed or NULL
3462  */
3463 
3464 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3465 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3466     const xmlChar *in, *e;
3467     const xmlChar *ret;
3468     size_t count = 0;
3469     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3470                        XML_MAX_TEXT_LENGTH :
3471                        XML_MAX_NAME_LENGTH;
3472 
3473 #ifdef DEBUG
3474     nbParseNCName++;
3475 #endif
3476 
3477     /*
3478      * Accelerator for simple ASCII names
3479      */
3480     in = ctxt->input->cur;
3481     e = ctxt->input->end;
3482     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 	 (*in == '_')) && (in < e)) {
3485 	in++;
3486 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3488 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3489 	        (*in == '_') || (*in == '-') ||
3490 	        (*in == '.')) && (in < e))
3491 	    in++;
3492 	if (in >= e)
3493 	    goto complex;
3494 	if ((*in > 0) && (*in < 0x80)) {
3495 	    count = in - ctxt->input->cur;
3496             if (count > maxLength) {
3497                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498                 return(NULL);
3499             }
3500 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 	    ctxt->input->cur = in;
3502 	    ctxt->input->col += count;
3503 	    if (ret == NULL) {
3504 	        xmlErrMemory(ctxt, NULL);
3505 	    }
3506 	    return(ret);
3507 	}
3508     }
3509 complex:
3510     return(xmlParseNCNameComplex(ctxt));
3511 }
3512 
3513 /**
3514  * xmlParseNameAndCompare:
3515  * @ctxt:  an XML parser context
3516  *
3517  * parse an XML name and compares for match
3518  * (specialized for endtag parsing)
3519  *
3520  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521  * and the name for mismatch
3522  */
3523 
3524 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3525 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526     register const xmlChar *cmp = other;
3527     register const xmlChar *in;
3528     const xmlChar *ret;
3529 
3530     GROW;
3531     if (ctxt->instate == XML_PARSER_EOF)
3532         return(NULL);
3533 
3534     in = ctxt->input->cur;
3535     while (*in != 0 && *in == *cmp) {
3536 	++in;
3537 	++cmp;
3538     }
3539     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540 	/* success */
3541 	ctxt->input->col += in - ctxt->input->cur;
3542 	ctxt->input->cur = in;
3543 	return (const xmlChar*) 1;
3544     }
3545     /* failure (or end of input buffer), check with full function */
3546     ret = xmlParseName (ctxt);
3547     /* strings coming from the dictionary direct compare possible */
3548     if (ret == other) {
3549 	return (const xmlChar*) 1;
3550     }
3551     return ret;
3552 }
3553 
3554 /**
3555  * xmlParseStringName:
3556  * @ctxt:  an XML parser context
3557  * @str:  a pointer to the string pointer (IN/OUT)
3558  *
3559  * parse an XML name.
3560  *
3561  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562  *                  CombiningChar | Extender
3563  *
3564  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565  *
3566  * [6] Names ::= Name (#x20 Name)*
3567  *
3568  * Returns the Name parsed or NULL. The @str pointer
3569  * is updated to the current location in the string.
3570  */
3571 
3572 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3573 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574     xmlChar buf[XML_MAX_NAMELEN + 5];
3575     const xmlChar *cur = *str;
3576     int len = 0, l;
3577     int c;
3578     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3579                     XML_MAX_TEXT_LENGTH :
3580                     XML_MAX_NAME_LENGTH;
3581 
3582 #ifdef DEBUG
3583     nbParseStringName++;
3584 #endif
3585 
3586     c = CUR_SCHAR(cur, l);
3587     if (!xmlIsNameStartChar(ctxt, c)) {
3588 	return(NULL);
3589     }
3590 
3591     COPY_BUF(l,buf,len,c);
3592     cur += l;
3593     c = CUR_SCHAR(cur, l);
3594     while (xmlIsNameChar(ctxt, c)) {
3595 	COPY_BUF(l,buf,len,c);
3596 	cur += l;
3597 	c = CUR_SCHAR(cur, l);
3598 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599 	    /*
3600 	     * Okay someone managed to make a huge name, so he's ready to pay
3601 	     * for the processing speed.
3602 	     */
3603 	    xmlChar *buffer;
3604 	    int max = len * 2;
3605 
3606 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607 	    if (buffer == NULL) {
3608 	        xmlErrMemory(ctxt, NULL);
3609 		return(NULL);
3610 	    }
3611 	    memcpy(buffer, buf, len);
3612 	    while (xmlIsNameChar(ctxt, c)) {
3613 		if (len + 10 > max) {
3614 		    xmlChar *tmp;
3615 
3616 		    max *= 2;
3617 		    tmp = (xmlChar *) xmlRealloc(buffer,
3618 			                            max * sizeof(xmlChar));
3619 		    if (tmp == NULL) {
3620 			xmlErrMemory(ctxt, NULL);
3621 			xmlFree(buffer);
3622 			return(NULL);
3623 		    }
3624 		    buffer = tmp;
3625 		}
3626 		COPY_BUF(l,buffer,len,c);
3627 		cur += l;
3628 		c = CUR_SCHAR(cur, l);
3629                 if (len > maxLength) {
3630                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631                     xmlFree(buffer);
3632                     return(NULL);
3633                 }
3634 	    }
3635 	    buffer[len] = 0;
3636 	    *str = cur;
3637 	    return(buffer);
3638 	}
3639     }
3640     if (len > maxLength) {
3641         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642         return(NULL);
3643     }
3644     *str = cur;
3645     return(xmlStrndup(buf, len));
3646 }
3647 
3648 /**
3649  * xmlParseNmtoken:
3650  * @ctxt:  an XML parser context
3651  *
3652  * parse an XML Nmtoken.
3653  *
3654  * [7] Nmtoken ::= (NameChar)+
3655  *
3656  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3657  *
3658  * Returns the Nmtoken parsed or NULL
3659  */
3660 
3661 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3662 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3663     xmlChar buf[XML_MAX_NAMELEN + 5];
3664     int len = 0, l;
3665     int c;
3666     int count = 0;
3667     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3668                     XML_MAX_TEXT_LENGTH :
3669                     XML_MAX_NAME_LENGTH;
3670 
3671 #ifdef DEBUG
3672     nbParseNmToken++;
3673 #endif
3674 
3675     GROW;
3676     if (ctxt->instate == XML_PARSER_EOF)
3677         return(NULL);
3678     c = CUR_CHAR(l);
3679 
3680     while (xmlIsNameChar(ctxt, c)) {
3681 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3682 	    count = 0;
3683 	    GROW;
3684 	}
3685 	COPY_BUF(l,buf,len,c);
3686 	NEXTL(l);
3687 	c = CUR_CHAR(l);
3688 	if (c == 0) {
3689 	    count = 0;
3690 	    GROW;
3691 	    if (ctxt->instate == XML_PARSER_EOF)
3692 		return(NULL);
3693             c = CUR_CHAR(l);
3694 	}
3695 	if (len >= XML_MAX_NAMELEN) {
3696 	    /*
3697 	     * Okay someone managed to make a huge token, so he's ready to pay
3698 	     * for the processing speed.
3699 	     */
3700 	    xmlChar *buffer;
3701 	    int max = len * 2;
3702 
3703 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704 	    if (buffer == NULL) {
3705 	        xmlErrMemory(ctxt, NULL);
3706 		return(NULL);
3707 	    }
3708 	    memcpy(buffer, buf, len);
3709 	    while (xmlIsNameChar(ctxt, c)) {
3710 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3711 		    count = 0;
3712 		    GROW;
3713                     if (ctxt->instate == XML_PARSER_EOF) {
3714                         xmlFree(buffer);
3715                         return(NULL);
3716                     }
3717 		}
3718 		if (len + 10 > max) {
3719 		    xmlChar *tmp;
3720 
3721 		    max *= 2;
3722 		    tmp = (xmlChar *) xmlRealloc(buffer,
3723 			                            max * sizeof(xmlChar));
3724 		    if (tmp == NULL) {
3725 			xmlErrMemory(ctxt, NULL);
3726 			xmlFree(buffer);
3727 			return(NULL);
3728 		    }
3729 		    buffer = tmp;
3730 		}
3731 		COPY_BUF(l,buffer,len,c);
3732 		NEXTL(l);
3733 		c = CUR_CHAR(l);
3734                 if (len > maxLength) {
3735                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736                     xmlFree(buffer);
3737                     return(NULL);
3738                 }
3739 	    }
3740 	    buffer[len] = 0;
3741 	    return(buffer);
3742 	}
3743     }
3744     if (len == 0)
3745         return(NULL);
3746     if (len > maxLength) {
3747         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748         return(NULL);
3749     }
3750     return(xmlStrndup(buf, len));
3751 }
3752 
3753 /**
3754  * xmlParseEntityValue:
3755  * @ctxt:  an XML parser context
3756  * @orig:  if non-NULL store a copy of the original entity value
3757  *
3758  * parse a value for ENTITY declarations
3759  *
3760  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3762  *
3763  * Returns the EntityValue parsed with reference substituted or NULL
3764  */
3765 
3766 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3767 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3768     xmlChar *buf = NULL;
3769     int len = 0;
3770     int size = XML_PARSER_BUFFER_SIZE;
3771     int c, l;
3772     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3773                     XML_MAX_HUGE_LENGTH :
3774                     XML_MAX_TEXT_LENGTH;
3775     xmlChar stop;
3776     xmlChar *ret = NULL;
3777     const xmlChar *cur = NULL;
3778     xmlParserInputPtr input;
3779 
3780     if (RAW == '"') stop = '"';
3781     else if (RAW == '\'') stop = '\'';
3782     else {
3783 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3784 	return(NULL);
3785     }
3786     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787     if (buf == NULL) {
3788 	xmlErrMemory(ctxt, NULL);
3789 	return(NULL);
3790     }
3791 
3792     /*
3793      * The content of the entity definition is copied in a buffer.
3794      */
3795 
3796     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3797     input = ctxt->input;
3798     GROW;
3799     if (ctxt->instate == XML_PARSER_EOF)
3800         goto error;
3801     NEXT;
3802     c = CUR_CHAR(l);
3803     /*
3804      * NOTE: 4.4.5 Included in Literal
3805      * When a parameter entity reference appears in a literal entity
3806      * value, ... a single or double quote character in the replacement
3807      * text is always treated as a normal data character and will not
3808      * terminate the literal.
3809      * In practice it means we stop the loop only when back at parsing
3810      * the initial entity and the quote is found
3811      */
3812     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814 	if (len + 5 >= size) {
3815 	    xmlChar *tmp;
3816 
3817 	    size *= 2;
3818 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819 	    if (tmp == NULL) {
3820 		xmlErrMemory(ctxt, NULL);
3821                 goto error;
3822 	    }
3823 	    buf = tmp;
3824 	}
3825 	COPY_BUF(l,buf,len,c);
3826 	NEXTL(l);
3827 
3828 	GROW;
3829 	c = CUR_CHAR(l);
3830 	if (c == 0) {
3831 	    GROW;
3832 	    c = CUR_CHAR(l);
3833 	}
3834 
3835         if (len > maxLength) {
3836             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837                            "entity value too long\n");
3838             goto error;
3839         }
3840     }
3841     buf[len] = 0;
3842     if (ctxt->instate == XML_PARSER_EOF)
3843         goto error;
3844     if (c != stop) {
3845         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3846         goto error;
3847     }
3848     NEXT;
3849 
3850     /*
3851      * Raise problem w.r.t. '&' and '%' being used in non-entities
3852      * reference constructs. Note Charref will be handled in
3853      * xmlStringDecodeEntities()
3854      */
3855     cur = buf;
3856     while (*cur != 0) { /* non input consuming */
3857 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858 	    xmlChar *name;
3859 	    xmlChar tmp = *cur;
3860             int nameOk = 0;
3861 
3862 	    cur++;
3863 	    name = xmlParseStringName(ctxt, &cur);
3864             if (name != NULL) {
3865                 nameOk = 1;
3866                 xmlFree(name);
3867             }
3868             if ((nameOk == 0) || (*cur != ';')) {
3869 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870 	    "EntityValue: '%c' forbidden except for entities references\n",
3871 	                          tmp);
3872                 goto error;
3873 	    }
3874 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875 		(ctxt->inputNr == 1)) {
3876 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3877                 goto error;
3878 	    }
3879 	    if (*cur == 0)
3880 	        break;
3881 	}
3882 	cur++;
3883     }
3884 
3885     /*
3886      * Then PEReference entities are substituted.
3887      *
3888      * NOTE: 4.4.7 Bypassed
3889      * When a general entity reference appears in the EntityValue in
3890      * an entity declaration, it is bypassed and left as is.
3891      * so XML_SUBSTITUTE_REF is not set here.
3892      */
3893     ++ctxt->depth;
3894     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3895                                   0, 0, 0);
3896     --ctxt->depth;
3897     if (orig != NULL) {
3898         *orig = buf;
3899         buf = NULL;
3900     }
3901 
3902 error:
3903     if (buf != NULL)
3904         xmlFree(buf);
3905     return(ret);
3906 }
3907 
3908 /**
3909  * xmlParseAttValueComplex:
3910  * @ctxt:  an XML parser context
3911  * @len:   the resulting attribute len
3912  * @normalize:  whether to apply the inner normalization
3913  *
3914  * parse a value for an attribute, this is the fallback function
3915  * of xmlParseAttValue() when the attribute parsing requires handling
3916  * of non-ASCII characters, or normalization compaction.
3917  *
3918  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3919  */
3920 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3921 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3922     xmlChar limit = 0;
3923     xmlChar *buf = NULL;
3924     xmlChar *rep = NULL;
3925     size_t len = 0;
3926     size_t buf_size = 0;
3927     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3928                        XML_MAX_HUGE_LENGTH :
3929                        XML_MAX_TEXT_LENGTH;
3930     int c, l, in_space = 0;
3931     xmlChar *current = NULL;
3932     xmlEntityPtr ent;
3933 
3934     if (NXT(0) == '"') {
3935 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3936 	limit = '"';
3937         NEXT;
3938     } else if (NXT(0) == '\'') {
3939 	limit = '\'';
3940 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3941         NEXT;
3942     } else {
3943 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3944 	return(NULL);
3945     }
3946 
3947     /*
3948      * allocate a translation buffer.
3949      */
3950     buf_size = XML_PARSER_BUFFER_SIZE;
3951     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952     if (buf == NULL) goto mem_error;
3953 
3954     /*
3955      * OK loop until we reach one of the ending char or a size limit.
3956      */
3957     c = CUR_CHAR(l);
3958     while (((NXT(0) != limit) && /* checked */
3959             (IS_CHAR(c)) && (c != '<')) &&
3960             (ctxt->instate != XML_PARSER_EOF)) {
3961 	if (c == '&') {
3962 	    in_space = 0;
3963 	    if (NXT(1) == '#') {
3964 		int val = xmlParseCharRef(ctxt);
3965 
3966 		if (val == '&') {
3967 		    if (ctxt->replaceEntities) {
3968 			if (len + 10 > buf_size) {
3969 			    growBuffer(buf, 10);
3970 			}
3971 			buf[len++] = '&';
3972 		    } else {
3973 			/*
3974 			 * The reparsing will be done in xmlStringGetNodeList()
3975 			 * called by the attribute() function in SAX.c
3976 			 */
3977 			if (len + 10 > buf_size) {
3978 			    growBuffer(buf, 10);
3979 			}
3980 			buf[len++] = '&';
3981 			buf[len++] = '#';
3982 			buf[len++] = '3';
3983 			buf[len++] = '8';
3984 			buf[len++] = ';';
3985 		    }
3986 		} else if (val != 0) {
3987 		    if (len + 10 > buf_size) {
3988 			growBuffer(buf, 10);
3989 		    }
3990 		    len += xmlCopyChar(0, &buf[len], val);
3991 		}
3992 	    } else {
3993 		ent = xmlParseEntityRef(ctxt);
3994 		ctxt->nbentities++;
3995 		if (ent != NULL)
3996 		    ctxt->nbentities += ent->owner;
3997 		if ((ent != NULL) &&
3998 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3999 		    if (len + 10 > buf_size) {
4000 			growBuffer(buf, 10);
4001 		    }
4002 		    if ((ctxt->replaceEntities == 0) &&
4003 		        (ent->content[0] == '&')) {
4004 			buf[len++] = '&';
4005 			buf[len++] = '#';
4006 			buf[len++] = '3';
4007 			buf[len++] = '8';
4008 			buf[len++] = ';';
4009 		    } else {
4010 			buf[len++] = ent->content[0];
4011 		    }
4012 		} else if ((ent != NULL) &&
4013 		           (ctxt->replaceEntities != 0)) {
4014 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4015 			++ctxt->depth;
4016 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4017 						      XML_SUBSTITUTE_REF,
4018 						      0, 0, 0);
4019 			--ctxt->depth;
4020 			if (rep != NULL) {
4021 			    current = rep;
4022 			    while (*current != 0) { /* non input consuming */
4023                                 if ((*current == 0xD) || (*current == 0xA) ||
4024                                     (*current == 0x9)) {
4025                                     buf[len++] = 0x20;
4026                                     current++;
4027                                 } else
4028                                     buf[len++] = *current++;
4029 				if (len + 10 > buf_size) {
4030 				    growBuffer(buf, 10);
4031 				}
4032 			    }
4033 			    xmlFree(rep);
4034 			    rep = NULL;
4035 			}
4036 		    } else {
4037 			if (len + 10 > buf_size) {
4038 			    growBuffer(buf, 10);
4039 			}
4040 			if (ent->content != NULL)
4041 			    buf[len++] = ent->content[0];
4042 		    }
4043 		} else if (ent != NULL) {
4044 		    int i = xmlStrlen(ent->name);
4045 		    const xmlChar *cur = ent->name;
4046 
4047 		    /*
4048 		     * This may look absurd but is needed to detect
4049 		     * entities problems
4050 		     */
4051 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052 			(ent->content != NULL) && (ent->checked == 0)) {
4053 			unsigned long oldnbent = ctxt->nbentities, diff;
4054 
4055 			++ctxt->depth;
4056 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4057 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4058 			--ctxt->depth;
4059 
4060                         diff = ctxt->nbentities - oldnbent + 1;
4061                         if (diff > INT_MAX / 2)
4062                             diff = INT_MAX / 2;
4063                         ent->checked = diff * 2;
4064 			if (rep != NULL) {
4065 			    if (xmlStrchr(rep, '<'))
4066 			        ent->checked |= 1;
4067 			    xmlFree(rep);
4068 			    rep = NULL;
4069 			} else {
4070                             ent->content[0] = 0;
4071                         }
4072 		    }
4073 
4074 		    /*
4075 		     * Just output the reference
4076 		     */
4077 		    buf[len++] = '&';
4078 		    while (len + i + 10 > buf_size) {
4079 			growBuffer(buf, i + 10);
4080 		    }
4081 		    for (;i > 0;i--)
4082 			buf[len++] = *cur++;
4083 		    buf[len++] = ';';
4084 		}
4085 	    }
4086 	} else {
4087 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088 	        if ((len != 0) || (!normalize)) {
4089 		    if ((!normalize) || (!in_space)) {
4090 			COPY_BUF(l,buf,len,0x20);
4091 			while (len + 10 > buf_size) {
4092 			    growBuffer(buf, 10);
4093 			}
4094 		    }
4095 		    in_space = 1;
4096 		}
4097 	    } else {
4098 	        in_space = 0;
4099 		COPY_BUF(l,buf,len,c);
4100 		if (len + 10 > buf_size) {
4101 		    growBuffer(buf, 10);
4102 		}
4103 	    }
4104 	    NEXTL(l);
4105 	}
4106 	GROW;
4107 	c = CUR_CHAR(l);
4108         if (len > maxLength) {
4109             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110                            "AttValue length too long\n");
4111             goto mem_error;
4112         }
4113     }
4114     if (ctxt->instate == XML_PARSER_EOF)
4115         goto error;
4116 
4117     if ((in_space) && (normalize)) {
4118         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4119     }
4120     buf[len] = 0;
4121     if (RAW == '<') {
4122 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4123     } else if (RAW != limit) {
4124 	if ((c != 0) && (!IS_CHAR(c))) {
4125 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126 			   "invalid character in attribute value\n");
4127 	} else {
4128 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129 			   "AttValue: ' expected\n");
4130         }
4131     } else
4132 	NEXT;
4133 
4134     if (attlen != NULL) *attlen = (int) len;
4135     return(buf);
4136 
4137 mem_error:
4138     xmlErrMemory(ctxt, NULL);
4139 error:
4140     if (buf != NULL)
4141         xmlFree(buf);
4142     if (rep != NULL)
4143         xmlFree(rep);
4144     return(NULL);
4145 }
4146 
4147 /**
4148  * xmlParseAttValue:
4149  * @ctxt:  an XML parser context
4150  *
4151  * parse a value for an attribute
4152  * Note: the parser won't do substitution of entities here, this
4153  * will be handled later in xmlStringGetNodeList
4154  *
4155  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4156  *                   "'" ([^<&'] | Reference)* "'"
4157  *
4158  * 3.3.3 Attribute-Value Normalization:
4159  * Before the value of an attribute is passed to the application or
4160  * checked for validity, the XML processor must normalize it as follows:
4161  * - a character reference is processed by appending the referenced
4162  *   character to the attribute value
4163  * - an entity reference is processed by recursively processing the
4164  *   replacement text of the entity
4165  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4166  *   appending #x20 to the normalized value, except that only a single
4167  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4168  *   parsed entity or the literal entity value of an internal parsed entity
4169  * - other characters are processed by appending them to the normalized value
4170  * If the declared value is not CDATA, then the XML processor must further
4171  * process the normalized attribute value by discarding any leading and
4172  * trailing space (#x20) characters, and by replacing sequences of space
4173  * (#x20) characters by a single space (#x20) character.
4174  * All attributes for which no declaration has been read should be treated
4175  * by a non-validating parser as if declared CDATA.
4176  *
4177  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4178  */
4179 
4180 
4181 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4182 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4183     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4185 }
4186 
4187 /**
4188  * xmlParseSystemLiteral:
4189  * @ctxt:  an XML parser context
4190  *
4191  * parse an XML Literal
4192  *
4193  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4194  *
4195  * Returns the SystemLiteral parsed or NULL
4196  */
4197 
4198 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4199 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4200     xmlChar *buf = NULL;
4201     int len = 0;
4202     int size = XML_PARSER_BUFFER_SIZE;
4203     int cur, l;
4204     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4205                     XML_MAX_TEXT_LENGTH :
4206                     XML_MAX_NAME_LENGTH;
4207     xmlChar stop;
4208     int state = ctxt->instate;
4209     int count = 0;
4210 
4211     SHRINK;
4212     if (RAW == '"') {
4213         NEXT;
4214 	stop = '"';
4215     } else if (RAW == '\'') {
4216         NEXT;
4217 	stop = '\'';
4218     } else {
4219 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4220 	return(NULL);
4221     }
4222 
4223     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224     if (buf == NULL) {
4225         xmlErrMemory(ctxt, NULL);
4226 	return(NULL);
4227     }
4228     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4229     cur = CUR_CHAR(l);
4230     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231 	if (len + 5 >= size) {
4232 	    xmlChar *tmp;
4233 
4234 	    size *= 2;
4235 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236 	    if (tmp == NULL) {
4237 	        xmlFree(buf);
4238 		xmlErrMemory(ctxt, NULL);
4239 		ctxt->instate = (xmlParserInputState) state;
4240 		return(NULL);
4241 	    }
4242 	    buf = tmp;
4243 	}
4244 	count++;
4245 	if (count > 50) {
4246 	    SHRINK;
4247 	    GROW;
4248 	    count = 0;
4249             if (ctxt->instate == XML_PARSER_EOF) {
4250 	        xmlFree(buf);
4251 		return(NULL);
4252             }
4253 	}
4254 	COPY_BUF(l,buf,len,cur);
4255 	NEXTL(l);
4256 	cur = CUR_CHAR(l);
4257 	if (cur == 0) {
4258 	    GROW;
4259 	    SHRINK;
4260 	    cur = CUR_CHAR(l);
4261 	}
4262         if (len > maxLength) {
4263             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264             xmlFree(buf);
4265             ctxt->instate = (xmlParserInputState) state;
4266             return(NULL);
4267         }
4268     }
4269     buf[len] = 0;
4270     ctxt->instate = (xmlParserInputState) state;
4271     if (!IS_CHAR(cur)) {
4272 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4273     } else {
4274 	NEXT;
4275     }
4276     return(buf);
4277 }
4278 
4279 /**
4280  * xmlParsePubidLiteral:
4281  * @ctxt:  an XML parser context
4282  *
4283  * parse an XML public literal
4284  *
4285  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4286  *
4287  * Returns the PubidLiteral parsed or NULL.
4288  */
4289 
4290 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4291 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4292     xmlChar *buf = NULL;
4293     int len = 0;
4294     int size = XML_PARSER_BUFFER_SIZE;
4295     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4296                     XML_MAX_TEXT_LENGTH :
4297                     XML_MAX_NAME_LENGTH;
4298     xmlChar cur;
4299     xmlChar stop;
4300     int count = 0;
4301     xmlParserInputState oldstate = ctxt->instate;
4302 
4303     SHRINK;
4304     if (RAW == '"') {
4305         NEXT;
4306 	stop = '"';
4307     } else if (RAW == '\'') {
4308         NEXT;
4309 	stop = '\'';
4310     } else {
4311 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4312 	return(NULL);
4313     }
4314     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315     if (buf == NULL) {
4316 	xmlErrMemory(ctxt, NULL);
4317 	return(NULL);
4318     }
4319     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4320     cur = CUR;
4321     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322 	if (len + 1 >= size) {
4323 	    xmlChar *tmp;
4324 
4325 	    size *= 2;
4326 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327 	    if (tmp == NULL) {
4328 		xmlErrMemory(ctxt, NULL);
4329 		xmlFree(buf);
4330 		return(NULL);
4331 	    }
4332 	    buf = tmp;
4333 	}
4334 	buf[len++] = cur;
4335 	count++;
4336 	if (count > 50) {
4337 	    SHRINK;
4338 	    GROW;
4339 	    count = 0;
4340             if (ctxt->instate == XML_PARSER_EOF) {
4341 		xmlFree(buf);
4342 		return(NULL);
4343             }
4344 	}
4345 	NEXT;
4346 	cur = CUR;
4347 	if (cur == 0) {
4348 	    GROW;
4349 	    SHRINK;
4350 	    cur = CUR;
4351 	}
4352         if (len > maxLength) {
4353             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354             xmlFree(buf);
4355             return(NULL);
4356         }
4357     }
4358     buf[len] = 0;
4359     if (cur != stop) {
4360 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4361     } else {
4362 	NEXT;
4363     }
4364     ctxt->instate = oldstate;
4365     return(buf);
4366 }
4367 
4368 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4369 
4370 /*
4371  * used for the test in the inner loop of the char data testing
4372  */
4373 static const unsigned char test_char_data[256] = {
4374     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4406 };
4407 
4408 /**
4409  * xmlParseCharData:
4410  * @ctxt:  an XML parser context
4411  * @cdata:  int indicating whether we are within a CDATA section
4412  *
4413  * parse a CharData section.
4414  * if we are within a CDATA section ']]>' marks an end of section.
4415  *
4416  * The right angle bracket (>) may be represented using the string "&gt;",
4417  * and must, for compatibility, be escaped using "&gt;" or a character
4418  * reference when it appears in the string "]]>" in content, when that
4419  * string is not marking the end of a CDATA section.
4420  *
4421  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4422  */
4423 
4424 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4425 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4426     const xmlChar *in;
4427     int nbchar = 0;
4428     int line = ctxt->input->line;
4429     int col = ctxt->input->col;
4430     int ccol;
4431 
4432     SHRINK;
4433     GROW;
4434     /*
4435      * Accelerated common case where input don't need to be
4436      * modified before passing it to the handler.
4437      */
4438     if (!cdata) {
4439 	in = ctxt->input->cur;
4440 	do {
4441 get_more_space:
4442 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4443 	    if (*in == 0xA) {
4444 		do {
4445 		    ctxt->input->line++; ctxt->input->col = 1;
4446 		    in++;
4447 		} while (*in == 0xA);
4448 		goto get_more_space;
4449 	    }
4450 	    if (*in == '<') {
4451 		nbchar = in - ctxt->input->cur;
4452 		if (nbchar > 0) {
4453 		    const xmlChar *tmp = ctxt->input->cur;
4454 		    ctxt->input->cur = in;
4455 
4456 		    if ((ctxt->sax != NULL) &&
4457 		        (ctxt->sax->ignorableWhitespace !=
4458 		         ctxt->sax->characters)) {
4459 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460 			    if (ctxt->sax->ignorableWhitespace != NULL)
4461 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4462 						       tmp, nbchar);
4463 			} else {
4464 			    if (ctxt->sax->characters != NULL)
4465 				ctxt->sax->characters(ctxt->userData,
4466 						      tmp, nbchar);
4467 			    if (*ctxt->space == -1)
4468 			        *ctxt->space = -2;
4469 			}
4470 		    } else if ((ctxt->sax != NULL) &&
4471 		               (ctxt->sax->characters != NULL)) {
4472 			ctxt->sax->characters(ctxt->userData,
4473 					      tmp, nbchar);
4474 		    }
4475 		}
4476 		return;
4477 	    }
4478 
4479 get_more:
4480             ccol = ctxt->input->col;
4481 	    while (test_char_data[*in]) {
4482 		in++;
4483 		ccol++;
4484 	    }
4485 	    ctxt->input->col = ccol;
4486 	    if (*in == 0xA) {
4487 		do {
4488 		    ctxt->input->line++; ctxt->input->col = 1;
4489 		    in++;
4490 		} while (*in == 0xA);
4491 		goto get_more;
4492 	    }
4493 	    if (*in == ']') {
4494 		if ((in[1] == ']') && (in[2] == '>')) {
4495 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4496 		    ctxt->input->cur = in + 1;
4497 		    return;
4498 		}
4499 		in++;
4500 		ctxt->input->col++;
4501 		goto get_more;
4502 	    }
4503 	    nbchar = in - ctxt->input->cur;
4504 	    if (nbchar > 0) {
4505 		if ((ctxt->sax != NULL) &&
4506 		    (ctxt->sax->ignorableWhitespace !=
4507 		     ctxt->sax->characters) &&
4508 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4509 		    const xmlChar *tmp = ctxt->input->cur;
4510 		    ctxt->input->cur = in;
4511 
4512 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513 		        if (ctxt->sax->ignorableWhitespace != NULL)
4514 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4515 							   tmp, nbchar);
4516 		    } else {
4517 		        if (ctxt->sax->characters != NULL)
4518 			    ctxt->sax->characters(ctxt->userData,
4519 						  tmp, nbchar);
4520 			if (*ctxt->space == -1)
4521 			    *ctxt->space = -2;
4522 		    }
4523                     line = ctxt->input->line;
4524                     col = ctxt->input->col;
4525 		} else if (ctxt->sax != NULL) {
4526 		    if (ctxt->sax->characters != NULL)
4527 			ctxt->sax->characters(ctxt->userData,
4528 					      ctxt->input->cur, nbchar);
4529                     line = ctxt->input->line;
4530                     col = ctxt->input->col;
4531 		}
4532                 /* something really bad happened in the SAX callback */
4533                 if (ctxt->instate != XML_PARSER_CONTENT)
4534                     return;
4535 	    }
4536 	    ctxt->input->cur = in;
4537 	    if (*in == 0xD) {
4538 		in++;
4539 		if (*in == 0xA) {
4540 		    ctxt->input->cur = in;
4541 		    in++;
4542 		    ctxt->input->line++; ctxt->input->col = 1;
4543 		    continue; /* while */
4544 		}
4545 		in--;
4546 	    }
4547 	    if (*in == '<') {
4548 		return;
4549 	    }
4550 	    if (*in == '&') {
4551 		return;
4552 	    }
4553 	    SHRINK;
4554 	    GROW;
4555             if (ctxt->instate == XML_PARSER_EOF)
4556 		return;
4557 	    in = ctxt->input->cur;
4558 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559 	nbchar = 0;
4560     }
4561     ctxt->input->line = line;
4562     ctxt->input->col = col;
4563     xmlParseCharDataComplex(ctxt, cdata);
4564 }
4565 
4566 /**
4567  * xmlParseCharDataComplex:
4568  * @ctxt:  an XML parser context
4569  * @cdata:  int indicating whether we are within a CDATA section
4570  *
4571  * parse a CharData section.this is the fallback function
4572  * of xmlParseCharData() when the parsing requires handling
4573  * of non-ASCII characters.
4574  */
4575 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4576 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4577     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4578     int nbchar = 0;
4579     int cur, l;
4580     int count = 0;
4581 
4582     SHRINK;
4583     GROW;
4584     cur = CUR_CHAR(l);
4585     while ((cur != '<') && /* checked */
4586            (cur != '&') &&
4587 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588 	if ((cur == ']') && (NXT(1) == ']') &&
4589 	    (NXT(2) == '>')) {
4590 	    if (cdata) break;
4591 	    else {
4592 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4593 	    }
4594 	}
4595 	COPY_BUF(l,buf,nbchar,cur);
4596 	/* move current position before possible calling of ctxt->sax->characters */
4597 	NEXTL(l);
4598 	cur = CUR_CHAR(l);
4599 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600 	    buf[nbchar] = 0;
4601 
4602 	    /*
4603 	     * OK the segment is to be consumed as chars.
4604 	     */
4605 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4607 		    if (ctxt->sax->ignorableWhitespace != NULL)
4608 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4609 			                               buf, nbchar);
4610 		} else {
4611 		    if (ctxt->sax->characters != NULL)
4612 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613 		    if ((ctxt->sax->characters !=
4614 		         ctxt->sax->ignorableWhitespace) &&
4615 			(*ctxt->space == -1))
4616 			*ctxt->space = -2;
4617 		}
4618 	    }
4619 	    nbchar = 0;
4620             /* something really bad happened in the SAX callback */
4621             if (ctxt->instate != XML_PARSER_CONTENT)
4622                 return;
4623 	}
4624 	count++;
4625 	if (count > 50) {
4626 	    SHRINK;
4627 	    GROW;
4628 	    count = 0;
4629             if (ctxt->instate == XML_PARSER_EOF)
4630 		return;
4631 	}
4632     }
4633     if (nbchar != 0) {
4634         buf[nbchar] = 0;
4635 	/*
4636 	 * OK the segment is to be consumed as chars.
4637 	 */
4638 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4640 		if (ctxt->sax->ignorableWhitespace != NULL)
4641 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642 	    } else {
4643 		if (ctxt->sax->characters != NULL)
4644 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646 		    (*ctxt->space == -1))
4647 		    *ctxt->space = -2;
4648 	    }
4649 	}
4650     }
4651     if ((cur != 0) && (!IS_CHAR(cur))) {
4652 	/* Generate the error and skip the offending character */
4653         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654                           "PCDATA invalid Char value %d\n",
4655 	                  cur);
4656 	NEXTL(l);
4657     }
4658 }
4659 
4660 /**
4661  * xmlParseExternalID:
4662  * @ctxt:  an XML parser context
4663  * @publicID:  a xmlChar** receiving PubidLiteral
4664  * @strict: indicate whether we should restrict parsing to only
4665  *          production [75], see NOTE below
4666  *
4667  * Parse an External ID or a Public ID
4668  *
4669  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4670  *       'PUBLIC' S PubidLiteral S SystemLiteral
4671  *
4672  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4673  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4674  *
4675  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4676  *
4677  * Returns the function returns SystemLiteral and in the second
4678  *                case publicID receives PubidLiteral, is strict is off
4679  *                it is possible to return NULL and have publicID set.
4680  */
4681 
4682 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4683 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4684     xmlChar *URI = NULL;
4685 
4686     SHRINK;
4687 
4688     *publicID = NULL;
4689     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690         SKIP(6);
4691 	if (SKIP_BLANKS == 0) {
4692 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693 	                   "Space required after 'SYSTEM'\n");
4694 	}
4695 	URI = xmlParseSystemLiteral(ctxt);
4696 	if (URI == NULL) {
4697 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4698         }
4699     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700         SKIP(6);
4701 	if (SKIP_BLANKS == 0) {
4702 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 		    "Space required after 'PUBLIC'\n");
4704 	}
4705 	*publicID = xmlParsePubidLiteral(ctxt);
4706 	if (*publicID == NULL) {
4707 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4708 	}
4709 	if (strict) {
4710 	    /*
4711 	     * We don't handle [83] so "S SystemLiteral" is required.
4712 	     */
4713 	    if (SKIP_BLANKS == 0) {
4714 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4715 			"Space required after the Public Identifier\n");
4716 	    }
4717 	} else {
4718 	    /*
4719 	     * We handle [83] so we return immediately, if
4720 	     * "S SystemLiteral" is not detected. We skip blanks if no
4721              * system literal was found, but this is harmless since we must
4722              * be at the end of a NotationDecl.
4723 	     */
4724 	    if (SKIP_BLANKS == 0) return(NULL);
4725 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4726 	}
4727 	URI = xmlParseSystemLiteral(ctxt);
4728 	if (URI == NULL) {
4729 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4730         }
4731     }
4732     return(URI);
4733 }
4734 
4735 /**
4736  * xmlParseCommentComplex:
4737  * @ctxt:  an XML parser context
4738  * @buf:  the already parsed part of the buffer
4739  * @len:  number of bytes in the buffer
4740  * @size:  allocated size of the buffer
4741  *
4742  * Skip an XML (SGML) comment <!-- .... -->
4743  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4744  *  must not occur within comments. "
4745  * This is the slow routine in case the accelerator for ascii didn't work
4746  *
4747  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4748  */
4749 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4750 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4751                        size_t len, size_t size) {
4752     int q, ql;
4753     int r, rl;
4754     int cur, l;
4755     size_t count = 0;
4756     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4757                        XML_MAX_HUGE_LENGTH :
4758                        XML_MAX_TEXT_LENGTH;
4759     int inputid;
4760 
4761     inputid = ctxt->input->id;
4762 
4763     if (buf == NULL) {
4764         len = 0;
4765 	size = XML_PARSER_BUFFER_SIZE;
4766 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4767 	if (buf == NULL) {
4768 	    xmlErrMemory(ctxt, NULL);
4769 	    return;
4770 	}
4771     }
4772     GROW;	/* Assure there's enough input data */
4773     q = CUR_CHAR(ql);
4774     if (q == 0)
4775         goto not_terminated;
4776     if (!IS_CHAR(q)) {
4777         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4778                           "xmlParseComment: invalid xmlChar value %d\n",
4779 	                  q);
4780 	xmlFree (buf);
4781 	return;
4782     }
4783     NEXTL(ql);
4784     r = CUR_CHAR(rl);
4785     if (r == 0)
4786         goto not_terminated;
4787     if (!IS_CHAR(r)) {
4788         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789                           "xmlParseComment: invalid xmlChar value %d\n",
4790 	                  q);
4791 	xmlFree (buf);
4792 	return;
4793     }
4794     NEXTL(rl);
4795     cur = CUR_CHAR(l);
4796     if (cur == 0)
4797         goto not_terminated;
4798     while (IS_CHAR(cur) && /* checked */
4799            ((cur != '>') ||
4800 	    (r != '-') || (q != '-'))) {
4801 	if ((r == '-') && (q == '-')) {
4802 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4803 	}
4804 	if (len + 5 >= size) {
4805 	    xmlChar *new_buf;
4806             size_t new_size;
4807 
4808 	    new_size = size * 2;
4809 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4810 	    if (new_buf == NULL) {
4811 		xmlFree (buf);
4812 		xmlErrMemory(ctxt, NULL);
4813 		return;
4814 	    }
4815 	    buf = new_buf;
4816             size = new_size;
4817 	}
4818 	COPY_BUF(ql,buf,len,q);
4819 	q = r;
4820 	ql = rl;
4821 	r = cur;
4822 	rl = l;
4823 
4824 	count++;
4825 	if (count > 50) {
4826 	    SHRINK;
4827 	    GROW;
4828 	    count = 0;
4829             if (ctxt->instate == XML_PARSER_EOF) {
4830 		xmlFree(buf);
4831 		return;
4832             }
4833 	}
4834 	NEXTL(l);
4835 	cur = CUR_CHAR(l);
4836 	if (cur == 0) {
4837 	    SHRINK;
4838 	    GROW;
4839 	    cur = CUR_CHAR(l);
4840 	}
4841 
4842         if (len > maxLength) {
4843             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4844                          "Comment too big found", NULL);
4845             xmlFree (buf);
4846             return;
4847         }
4848     }
4849     buf[len] = 0;
4850     if (cur == 0) {
4851 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4852 	                     "Comment not terminated \n<!--%.50s\n", buf);
4853     } else if (!IS_CHAR(cur)) {
4854         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4855                           "xmlParseComment: invalid xmlChar value %d\n",
4856 	                  cur);
4857     } else {
4858 	if (inputid != ctxt->input->id) {
4859 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4860 		           "Comment doesn't start and stop in the same"
4861                            " entity\n");
4862 	}
4863         NEXT;
4864 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4865 	    (!ctxt->disableSAX))
4866 	    ctxt->sax->comment(ctxt->userData, buf);
4867     }
4868     xmlFree(buf);
4869     return;
4870 not_terminated:
4871     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872 			 "Comment not terminated\n", NULL);
4873     xmlFree(buf);
4874     return;
4875 }
4876 
4877 /**
4878  * xmlParseComment:
4879  * @ctxt:  an XML parser context
4880  *
4881  * Skip an XML (SGML) comment <!-- .... -->
4882  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4883  *  must not occur within comments. "
4884  *
4885  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4886  */
4887 void
xmlParseComment(xmlParserCtxtPtr ctxt)4888 xmlParseComment(xmlParserCtxtPtr ctxt) {
4889     xmlChar *buf = NULL;
4890     size_t size = XML_PARSER_BUFFER_SIZE;
4891     size_t len = 0;
4892     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4893                        XML_MAX_HUGE_LENGTH :
4894                        XML_MAX_TEXT_LENGTH;
4895     xmlParserInputState state;
4896     const xmlChar *in;
4897     size_t nbchar = 0;
4898     int ccol;
4899     int inputid;
4900 
4901     /*
4902      * Check that there is a comment right here.
4903      */
4904     if ((RAW != '<') || (NXT(1) != '!') ||
4905         (NXT(2) != '-') || (NXT(3) != '-')) return;
4906     state = ctxt->instate;
4907     ctxt->instate = XML_PARSER_COMMENT;
4908     inputid = ctxt->input->id;
4909     SKIP(4);
4910     SHRINK;
4911     GROW;
4912 
4913     /*
4914      * Accelerated common case where input don't need to be
4915      * modified before passing it to the handler.
4916      */
4917     in = ctxt->input->cur;
4918     do {
4919 	if (*in == 0xA) {
4920 	    do {
4921 		ctxt->input->line++; ctxt->input->col = 1;
4922 		in++;
4923 	    } while (*in == 0xA);
4924 	}
4925 get_more:
4926         ccol = ctxt->input->col;
4927 	while (((*in > '-') && (*in <= 0x7F)) ||
4928 	       ((*in >= 0x20) && (*in < '-')) ||
4929 	       (*in == 0x09)) {
4930 		    in++;
4931 		    ccol++;
4932 	}
4933 	ctxt->input->col = ccol;
4934 	if (*in == 0xA) {
4935 	    do {
4936 		ctxt->input->line++; ctxt->input->col = 1;
4937 		in++;
4938 	    } while (*in == 0xA);
4939 	    goto get_more;
4940 	}
4941 	nbchar = in - ctxt->input->cur;
4942 	/*
4943 	 * save current set of data
4944 	 */
4945 	if (nbchar > 0) {
4946 	    if ((ctxt->sax != NULL) &&
4947 		(ctxt->sax->comment != NULL)) {
4948 		if (buf == NULL) {
4949 		    if ((*in == '-') && (in[1] == '-'))
4950 		        size = nbchar + 1;
4951 		    else
4952 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4953 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4954 		    if (buf == NULL) {
4955 		        xmlErrMemory(ctxt, NULL);
4956 			ctxt->instate = state;
4957 			return;
4958 		    }
4959 		    len = 0;
4960 		} else if (len + nbchar + 1 >= size) {
4961 		    xmlChar *new_buf;
4962 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4963 		    new_buf = (xmlChar *) xmlRealloc(buf,
4964 		                                     size * sizeof(xmlChar));
4965 		    if (new_buf == NULL) {
4966 		        xmlFree (buf);
4967 			xmlErrMemory(ctxt, NULL);
4968 			ctxt->instate = state;
4969 			return;
4970 		    }
4971 		    buf = new_buf;
4972 		}
4973 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4974 		len += nbchar;
4975 		buf[len] = 0;
4976 	    }
4977 	}
4978         if (len > maxLength) {
4979             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4980                          "Comment too big found", NULL);
4981             xmlFree (buf);
4982             return;
4983         }
4984 	ctxt->input->cur = in;
4985 	if (*in == 0xA) {
4986 	    in++;
4987 	    ctxt->input->line++; ctxt->input->col = 1;
4988 	}
4989 	if (*in == 0xD) {
4990 	    in++;
4991 	    if (*in == 0xA) {
4992 		ctxt->input->cur = in;
4993 		in++;
4994 		ctxt->input->line++; ctxt->input->col = 1;
4995 		goto get_more;
4996 	    }
4997 	    in--;
4998 	}
4999 	SHRINK;
5000 	GROW;
5001         if (ctxt->instate == XML_PARSER_EOF) {
5002             xmlFree(buf);
5003             return;
5004         }
5005 	in = ctxt->input->cur;
5006 	if (*in == '-') {
5007 	    if (in[1] == '-') {
5008 	        if (in[2] == '>') {
5009 		    if (ctxt->input->id != inputid) {
5010 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5011 			               "comment doesn't start and stop in the"
5012                                        " same entity\n");
5013 		    }
5014 		    SKIP(3);
5015 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5016 		        (!ctxt->disableSAX)) {
5017 			if (buf != NULL)
5018 			    ctxt->sax->comment(ctxt->userData, buf);
5019 			else
5020 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5021 		    }
5022 		    if (buf != NULL)
5023 		        xmlFree(buf);
5024 		    if (ctxt->instate != XML_PARSER_EOF)
5025 			ctxt->instate = state;
5026 		    return;
5027 		}
5028 		if (buf != NULL) {
5029 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5030 		                      "Double hyphen within comment: "
5031                                       "<!--%.50s\n",
5032 				      buf);
5033 		} else
5034 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5035 		                      "Double hyphen within comment\n", NULL);
5036                 if (ctxt->instate == XML_PARSER_EOF) {
5037                     xmlFree(buf);
5038                     return;
5039                 }
5040 		in++;
5041 		ctxt->input->col++;
5042 	    }
5043 	    in++;
5044 	    ctxt->input->col++;
5045 	    goto get_more;
5046 	}
5047     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5048     xmlParseCommentComplex(ctxt, buf, len, size);
5049     ctxt->instate = state;
5050     return;
5051 }
5052 
5053 
5054 /**
5055  * xmlParsePITarget:
5056  * @ctxt:  an XML parser context
5057  *
5058  * parse the name of a PI
5059  *
5060  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5061  *
5062  * Returns the PITarget name or NULL
5063  */
5064 
5065 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5066 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5067     const xmlChar *name;
5068 
5069     name = xmlParseName(ctxt);
5070     if ((name != NULL) &&
5071         ((name[0] == 'x') || (name[0] == 'X')) &&
5072         ((name[1] == 'm') || (name[1] == 'M')) &&
5073         ((name[2] == 'l') || (name[2] == 'L'))) {
5074 	int i;
5075 	if ((name[0] == 'x') && (name[1] == 'm') &&
5076 	    (name[2] == 'l') && (name[3] == 0)) {
5077 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 		 "XML declaration allowed only at the start of the document\n");
5079 	    return(name);
5080 	} else if (name[3] == 0) {
5081 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5082 	    return(name);
5083 	}
5084 	for (i = 0;;i++) {
5085 	    if (xmlW3CPIs[i] == NULL) break;
5086 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5087 	        return(name);
5088 	}
5089 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5090 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5091 		      NULL, NULL);
5092     }
5093     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5094 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5095 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5096     }
5097     return(name);
5098 }
5099 
5100 #ifdef LIBXML_CATALOG_ENABLED
5101 /**
5102  * xmlParseCatalogPI:
5103  * @ctxt:  an XML parser context
5104  * @catalog:  the PI value string
5105  *
5106  * parse an XML Catalog Processing Instruction.
5107  *
5108  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5109  *
5110  * Occurs only if allowed by the user and if happening in the Misc
5111  * part of the document before any doctype information
5112  * This will add the given catalog to the parsing context in order
5113  * to be used if there is a resolution need further down in the document
5114  */
5115 
5116 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5117 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5118     xmlChar *URL = NULL;
5119     const xmlChar *tmp, *base;
5120     xmlChar marker;
5121 
5122     tmp = catalog;
5123     while (IS_BLANK_CH(*tmp)) tmp++;
5124     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5125 	goto error;
5126     tmp += 7;
5127     while (IS_BLANK_CH(*tmp)) tmp++;
5128     if (*tmp != '=') {
5129 	return;
5130     }
5131     tmp++;
5132     while (IS_BLANK_CH(*tmp)) tmp++;
5133     marker = *tmp;
5134     if ((marker != '\'') && (marker != '"'))
5135 	goto error;
5136     tmp++;
5137     base = tmp;
5138     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5139     if (*tmp == 0)
5140 	goto error;
5141     URL = xmlStrndup(base, tmp - base);
5142     tmp++;
5143     while (IS_BLANK_CH(*tmp)) tmp++;
5144     if (*tmp != 0)
5145 	goto error;
5146 
5147     if (URL != NULL) {
5148 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5149 	xmlFree(URL);
5150     }
5151     return;
5152 
5153 error:
5154     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5155 	          "Catalog PI syntax error: %s\n",
5156 		  catalog, NULL);
5157     if (URL != NULL)
5158 	xmlFree(URL);
5159 }
5160 #endif
5161 
5162 /**
5163  * xmlParsePI:
5164  * @ctxt:  an XML parser context
5165  *
5166  * parse an XML Processing Instruction.
5167  *
5168  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5169  *
5170  * The processing is transferred to SAX once parsed.
5171  */
5172 
5173 void
xmlParsePI(xmlParserCtxtPtr ctxt)5174 xmlParsePI(xmlParserCtxtPtr ctxt) {
5175     xmlChar *buf = NULL;
5176     size_t len = 0;
5177     size_t size = XML_PARSER_BUFFER_SIZE;
5178     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5179                        XML_MAX_HUGE_LENGTH :
5180                        XML_MAX_TEXT_LENGTH;
5181     int cur, l;
5182     const xmlChar *target;
5183     xmlParserInputState state;
5184     int count = 0;
5185 
5186     if ((RAW == '<') && (NXT(1) == '?')) {
5187 	int inputid = ctxt->input->id;
5188 	state = ctxt->instate;
5189         ctxt->instate = XML_PARSER_PI;
5190 	/*
5191 	 * this is a Processing Instruction.
5192 	 */
5193 	SKIP(2);
5194 	SHRINK;
5195 
5196 	/*
5197 	 * Parse the target name and check for special support like
5198 	 * namespace.
5199 	 */
5200         target = xmlParsePITarget(ctxt);
5201 	if (target != NULL) {
5202 	    if ((RAW == '?') && (NXT(1) == '>')) {
5203 		if (inputid != ctxt->input->id) {
5204 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5205 	                           "PI declaration doesn't start and stop in"
5206                                    " the same entity\n");
5207 		}
5208 		SKIP(2);
5209 
5210 		/*
5211 		 * SAX: PI detected.
5212 		 */
5213 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5214 		    (ctxt->sax->processingInstruction != NULL))
5215 		    ctxt->sax->processingInstruction(ctxt->userData,
5216 		                                     target, NULL);
5217 		if (ctxt->instate != XML_PARSER_EOF)
5218 		    ctxt->instate = state;
5219 		return;
5220 	    }
5221 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5222 	    if (buf == NULL) {
5223 		xmlErrMemory(ctxt, NULL);
5224 		ctxt->instate = state;
5225 		return;
5226 	    }
5227 	    if (SKIP_BLANKS == 0) {
5228 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5229 			  "ParsePI: PI %s space expected\n", target);
5230 	    }
5231 	    cur = CUR_CHAR(l);
5232 	    while (IS_CHAR(cur) && /* checked */
5233 		   ((cur != '?') || (NXT(1) != '>'))) {
5234 		if (len + 5 >= size) {
5235 		    xmlChar *tmp;
5236                     size_t new_size = size * 2;
5237 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5238 		    if (tmp == NULL) {
5239 			xmlErrMemory(ctxt, NULL);
5240 			xmlFree(buf);
5241 			ctxt->instate = state;
5242 			return;
5243 		    }
5244 		    buf = tmp;
5245                     size = new_size;
5246 		}
5247 		count++;
5248 		if (count > 50) {
5249 		    SHRINK;
5250 		    GROW;
5251                     if (ctxt->instate == XML_PARSER_EOF) {
5252                         xmlFree(buf);
5253                         return;
5254                     }
5255 		    count = 0;
5256 		}
5257 		COPY_BUF(l,buf,len,cur);
5258 		NEXTL(l);
5259 		cur = CUR_CHAR(l);
5260 		if (cur == 0) {
5261 		    SHRINK;
5262 		    GROW;
5263 		    cur = CUR_CHAR(l);
5264 		}
5265                 if (len > maxLength) {
5266                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5267                                       "PI %s too big found", target);
5268                     xmlFree(buf);
5269                     ctxt->instate = state;
5270                     return;
5271                 }
5272 	    }
5273 	    buf[len] = 0;
5274 	    if (cur != '?') {
5275 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5276 		      "ParsePI: PI %s never end ...\n", target);
5277 	    } else {
5278 		if (inputid != ctxt->input->id) {
5279 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5280 	                           "PI declaration doesn't start and stop in"
5281                                    " the same entity\n");
5282 		}
5283 		SKIP(2);
5284 
5285 #ifdef LIBXML_CATALOG_ENABLED
5286 		if (((state == XML_PARSER_MISC) ||
5287 	             (state == XML_PARSER_START)) &&
5288 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5289 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5290 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5291 			(allow == XML_CATA_ALLOW_ALL))
5292 			xmlParseCatalogPI(ctxt, buf);
5293 		}
5294 #endif
5295 
5296 
5297 		/*
5298 		 * SAX: PI detected.
5299 		 */
5300 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5301 		    (ctxt->sax->processingInstruction != NULL))
5302 		    ctxt->sax->processingInstruction(ctxt->userData,
5303 		                                     target, buf);
5304 	    }
5305 	    xmlFree(buf);
5306 	} else {
5307 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5308 	}
5309 	if (ctxt->instate != XML_PARSER_EOF)
5310 	    ctxt->instate = state;
5311     }
5312 }
5313 
5314 /**
5315  * xmlParseNotationDecl:
5316  * @ctxt:  an XML parser context
5317  *
5318  * parse a notation declaration
5319  *
5320  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5321  *
5322  * Hence there is actually 3 choices:
5323  *     'PUBLIC' S PubidLiteral
5324  *     'PUBLIC' S PubidLiteral S SystemLiteral
5325  * and 'SYSTEM' S SystemLiteral
5326  *
5327  * See the NOTE on xmlParseExternalID().
5328  */
5329 
5330 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5331 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5332     const xmlChar *name;
5333     xmlChar *Pubid;
5334     xmlChar *Systemid;
5335 
5336     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5337 	int inputid = ctxt->input->id;
5338 	SHRINK;
5339 	SKIP(10);
5340 	if (SKIP_BLANKS == 0) {
5341 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342 			   "Space required after '<!NOTATION'\n");
5343 	    return;
5344 	}
5345 
5346         name = xmlParseName(ctxt);
5347 	if (name == NULL) {
5348 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5349 	    return;
5350 	}
5351 	if (xmlStrchr(name, ':') != NULL) {
5352 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5353 		     "colons are forbidden from notation names '%s'\n",
5354 		     name, NULL, NULL);
5355 	}
5356 	if (SKIP_BLANKS == 0) {
5357 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358 		     "Space required after the NOTATION name'\n");
5359 	    return;
5360 	}
5361 
5362 	/*
5363 	 * Parse the IDs.
5364 	 */
5365 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5366 	SKIP_BLANKS;
5367 
5368 	if (RAW == '>') {
5369 	    if (inputid != ctxt->input->id) {
5370 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5371 	                       "Notation declaration doesn't start and stop"
5372                                " in the same entity\n");
5373 	    }
5374 	    NEXT;
5375 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376 		(ctxt->sax->notationDecl != NULL))
5377 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378 	} else {
5379 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380 	}
5381 	if (Systemid != NULL) xmlFree(Systemid);
5382 	if (Pubid != NULL) xmlFree(Pubid);
5383     }
5384 }
5385 
5386 /**
5387  * xmlParseEntityDecl:
5388  * @ctxt:  an XML parser context
5389  *
5390  * parse <!ENTITY declarations
5391  *
5392  * [70] EntityDecl ::= GEDecl | PEDecl
5393  *
5394  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5395  *
5396  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5397  *
5398  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5399  *
5400  * [74] PEDef ::= EntityValue | ExternalID
5401  *
5402  * [76] NDataDecl ::= S 'NDATA' S Name
5403  *
5404  * [ VC: Notation Declared ]
5405  * The Name must match the declared name of a notation.
5406  */
5407 
5408 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5409 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5410     const xmlChar *name = NULL;
5411     xmlChar *value = NULL;
5412     xmlChar *URI = NULL, *literal = NULL;
5413     const xmlChar *ndata = NULL;
5414     int isParameter = 0;
5415     xmlChar *orig = NULL;
5416 
5417     /* GROW; done in the caller */
5418     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5419 	int inputid = ctxt->input->id;
5420 	SHRINK;
5421 	SKIP(8);
5422 	if (SKIP_BLANKS == 0) {
5423 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5424 			   "Space required after '<!ENTITY'\n");
5425 	}
5426 
5427 	if (RAW == '%') {
5428 	    NEXT;
5429 	    if (SKIP_BLANKS == 0) {
5430 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431 			       "Space required after '%%'\n");
5432 	    }
5433 	    isParameter = 1;
5434 	}
5435 
5436         name = xmlParseName(ctxt);
5437 	if (name == NULL) {
5438 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5439 	                   "xmlParseEntityDecl: no name\n");
5440             return;
5441 	}
5442 	if (xmlStrchr(name, ':') != NULL) {
5443 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5444 		     "colons are forbidden from entities names '%s'\n",
5445 		     name, NULL, NULL);
5446 	}
5447 	if (SKIP_BLANKS == 0) {
5448 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 			   "Space required after the entity name\n");
5450 	}
5451 
5452 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5453 	/*
5454 	 * handle the various case of definitions...
5455 	 */
5456 	if (isParameter) {
5457 	    if ((RAW == '"') || (RAW == '\'')) {
5458 	        value = xmlParseEntityValue(ctxt, &orig);
5459 		if (value) {
5460 		    if ((ctxt->sax != NULL) &&
5461 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462 			ctxt->sax->entityDecl(ctxt->userData, name,
5463 		                    XML_INTERNAL_PARAMETER_ENTITY,
5464 				    NULL, NULL, value);
5465 		}
5466 	    } else {
5467 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5468 		if ((URI == NULL) && (literal == NULL)) {
5469 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5470 		}
5471 		if (URI) {
5472 		    xmlURIPtr uri;
5473 
5474 		    uri = xmlParseURI((const char *) URI);
5475 		    if (uri == NULL) {
5476 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477 				     "Invalid URI: %s\n", URI);
5478 			/*
5479 			 * This really ought to be a well formedness error
5480 			 * but the XML Core WG decided otherwise c.f. issue
5481 			 * E26 of the XML erratas.
5482 			 */
5483 		    } else {
5484 			if (uri->fragment != NULL) {
5485 			    /*
5486 			     * Okay this is foolish to block those but not
5487 			     * invalid URIs.
5488 			     */
5489 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5490 			} else {
5491 			    if ((ctxt->sax != NULL) &&
5492 				(!ctxt->disableSAX) &&
5493 				(ctxt->sax->entityDecl != NULL))
5494 				ctxt->sax->entityDecl(ctxt->userData, name,
5495 					    XML_EXTERNAL_PARAMETER_ENTITY,
5496 					    literal, URI, NULL);
5497 			}
5498 			xmlFreeURI(uri);
5499 		    }
5500 		}
5501 	    }
5502 	} else {
5503 	    if ((RAW == '"') || (RAW == '\'')) {
5504 	        value = xmlParseEntityValue(ctxt, &orig);
5505 		if ((ctxt->sax != NULL) &&
5506 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507 		    ctxt->sax->entityDecl(ctxt->userData, name,
5508 				XML_INTERNAL_GENERAL_ENTITY,
5509 				NULL, NULL, value);
5510 		/*
5511 		 * For expat compatibility in SAX mode.
5512 		 */
5513 		if ((ctxt->myDoc == NULL) ||
5514 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515 		    if (ctxt->myDoc == NULL) {
5516 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5517 			if (ctxt->myDoc == NULL) {
5518 			    xmlErrMemory(ctxt, "New Doc failed");
5519 			    return;
5520 			}
5521 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5522 		    }
5523 		    if (ctxt->myDoc->intSubset == NULL)
5524 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 					    BAD_CAST "fake", NULL, NULL);
5526 
5527 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528 			              NULL, NULL, value);
5529 		}
5530 	    } else {
5531 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5532 		if ((URI == NULL) && (literal == NULL)) {
5533 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5534 		}
5535 		if (URI) {
5536 		    xmlURIPtr uri;
5537 
5538 		    uri = xmlParseURI((const char *)URI);
5539 		    if (uri == NULL) {
5540 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541 				     "Invalid URI: %s\n", URI);
5542 			/*
5543 			 * This really ought to be a well formedness error
5544 			 * but the XML Core WG decided otherwise c.f. issue
5545 			 * E26 of the XML erratas.
5546 			 */
5547 		    } else {
5548 			if (uri->fragment != NULL) {
5549 			    /*
5550 			     * Okay this is foolish to block those but not
5551 			     * invalid URIs.
5552 			     */
5553 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5554 			}
5555 			xmlFreeURI(uri);
5556 		    }
5557 		}
5558 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5559 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 				   "Space required before 'NDATA'\n");
5561 		}
5562 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5563 		    SKIP(5);
5564 		    if (SKIP_BLANKS == 0) {
5565 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 				       "Space required after 'NDATA'\n");
5567 		    }
5568 		    ndata = xmlParseName(ctxt);
5569 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5570 		        (ctxt->sax->unparsedEntityDecl != NULL))
5571 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5572 				    literal, URI, ndata);
5573 		} else {
5574 		    if ((ctxt->sax != NULL) &&
5575 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5576 			ctxt->sax->entityDecl(ctxt->userData, name,
5577 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5578 				    literal, URI, NULL);
5579 		    /*
5580 		     * For expat compatibility in SAX mode.
5581 		     * assuming the entity replacement was asked for
5582 		     */
5583 		    if ((ctxt->replaceEntities != 0) &&
5584 			((ctxt->myDoc == NULL) ||
5585 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5586 			if (ctxt->myDoc == NULL) {
5587 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588 			    if (ctxt->myDoc == NULL) {
5589 			        xmlErrMemory(ctxt, "New Doc failed");
5590 				return;
5591 			    }
5592 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593 			}
5594 
5595 			if (ctxt->myDoc->intSubset == NULL)
5596 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597 						BAD_CAST "fake", NULL, NULL);
5598 			xmlSAX2EntityDecl(ctxt, name,
5599 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5600 				          literal, URI, NULL);
5601 		    }
5602 		}
5603 	    }
5604 	}
5605 	if (ctxt->instate == XML_PARSER_EOF)
5606 	    goto done;
5607 	SKIP_BLANKS;
5608 	if (RAW != '>') {
5609 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5610 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5611 	    xmlHaltParser(ctxt);
5612 	} else {
5613 	    if (inputid != ctxt->input->id) {
5614 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5615 	                       "Entity declaration doesn't start and stop in"
5616                                " the same entity\n");
5617 	    }
5618 	    NEXT;
5619 	}
5620 	if (orig != NULL) {
5621 	    /*
5622 	     * Ugly mechanism to save the raw entity value.
5623 	     */
5624 	    xmlEntityPtr cur = NULL;
5625 
5626 	    if (isParameter) {
5627 	        if ((ctxt->sax != NULL) &&
5628 		    (ctxt->sax->getParameterEntity != NULL))
5629 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630 	    } else {
5631 	        if ((ctxt->sax != NULL) &&
5632 		    (ctxt->sax->getEntity != NULL))
5633 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5634 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5635 		    cur = xmlSAX2GetEntity(ctxt, name);
5636 		}
5637 	    }
5638             if ((cur != NULL) && (cur->orig == NULL)) {
5639 		cur->orig = orig;
5640                 orig = NULL;
5641 	    }
5642 	}
5643 
5644 done:
5645 	if (value != NULL) xmlFree(value);
5646 	if (URI != NULL) xmlFree(URI);
5647 	if (literal != NULL) xmlFree(literal);
5648         if (orig != NULL) xmlFree(orig);
5649     }
5650 }
5651 
5652 /**
5653  * xmlParseDefaultDecl:
5654  * @ctxt:  an XML parser context
5655  * @value:  Receive a possible fixed default value for the attribute
5656  *
5657  * Parse an attribute default declaration
5658  *
5659  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5660  *
5661  * [ VC: Required Attribute ]
5662  * if the default declaration is the keyword #REQUIRED, then the
5663  * attribute must be specified for all elements of the type in the
5664  * attribute-list declaration.
5665  *
5666  * [ VC: Attribute Default Legal ]
5667  * The declared default value must meet the lexical constraints of
5668  * the declared attribute type c.f. xmlValidateAttributeDecl()
5669  *
5670  * [ VC: Fixed Attribute Default ]
5671  * if an attribute has a default value declared with the #FIXED
5672  * keyword, instances of that attribute must match the default value.
5673  *
5674  * [ WFC: No < in Attribute Values ]
5675  * handled in xmlParseAttValue()
5676  *
5677  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5678  *          or XML_ATTRIBUTE_FIXED.
5679  */
5680 
5681 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5682 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683     int val;
5684     xmlChar *ret;
5685 
5686     *value = NULL;
5687     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5688 	SKIP(9);
5689 	return(XML_ATTRIBUTE_REQUIRED);
5690     }
5691     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5692 	SKIP(8);
5693 	return(XML_ATTRIBUTE_IMPLIED);
5694     }
5695     val = XML_ATTRIBUTE_NONE;
5696     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5697 	SKIP(6);
5698 	val = XML_ATTRIBUTE_FIXED;
5699 	if (SKIP_BLANKS == 0) {
5700 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 			   "Space required after '#FIXED'\n");
5702 	}
5703     }
5704     ret = xmlParseAttValue(ctxt);
5705     ctxt->instate = XML_PARSER_DTD;
5706     if (ret == NULL) {
5707 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5708 		       "Attribute default value declaration error\n");
5709     } else
5710         *value = ret;
5711     return(val);
5712 }
5713 
5714 /**
5715  * xmlParseNotationType:
5716  * @ctxt:  an XML parser context
5717  *
5718  * parse an Notation attribute type.
5719  *
5720  * Note: the leading 'NOTATION' S part has already being parsed...
5721  *
5722  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5723  *
5724  * [ VC: Notation Attributes ]
5725  * Values of this type must match one of the notation names included
5726  * in the declaration; all notation names in the declaration must be declared.
5727  *
5728  * Returns: the notation attribute tree built while parsing
5729  */
5730 
5731 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5732 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5733     const xmlChar *name;
5734     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735 
5736     if (RAW != '(') {
5737 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738 	return(NULL);
5739     }
5740     SHRINK;
5741     do {
5742         NEXT;
5743 	SKIP_BLANKS;
5744         name = xmlParseName(ctxt);
5745 	if (name == NULL) {
5746 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 			   "Name expected in NOTATION declaration\n");
5748             xmlFreeEnumeration(ret);
5749 	    return(NULL);
5750 	}
5751 	tmp = ret;
5752 	while (tmp != NULL) {
5753 	    if (xmlStrEqual(name, tmp->name)) {
5754 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5755 	  "standalone: attribute notation value token %s duplicated\n",
5756 				 name, NULL);
5757 		if (!xmlDictOwns(ctxt->dict, name))
5758 		    xmlFree((xmlChar *) name);
5759 		break;
5760 	    }
5761 	    tmp = tmp->next;
5762 	}
5763 	if (tmp == NULL) {
5764 	    cur = xmlCreateEnumeration(name);
5765 	    if (cur == NULL) {
5766                 xmlFreeEnumeration(ret);
5767                 return(NULL);
5768             }
5769 	    if (last == NULL) ret = last = cur;
5770 	    else {
5771 		last->next = cur;
5772 		last = cur;
5773 	    }
5774 	}
5775 	SKIP_BLANKS;
5776     } while (RAW == '|');
5777     if (RAW != ')') {
5778 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5779         xmlFreeEnumeration(ret);
5780 	return(NULL);
5781     }
5782     NEXT;
5783     return(ret);
5784 }
5785 
5786 /**
5787  * xmlParseEnumerationType:
5788  * @ctxt:  an XML parser context
5789  *
5790  * parse an Enumeration attribute type.
5791  *
5792  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5793  *
5794  * [ VC: Enumeration ]
5795  * Values of this type must match one of the Nmtoken tokens in
5796  * the declaration
5797  *
5798  * Returns: the enumeration attribute tree built while parsing
5799  */
5800 
5801 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5802 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5803     xmlChar *name;
5804     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5805 
5806     if (RAW != '(') {
5807 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808 	return(NULL);
5809     }
5810     SHRINK;
5811     do {
5812         NEXT;
5813 	SKIP_BLANKS;
5814         name = xmlParseNmtoken(ctxt);
5815 	if (name == NULL) {
5816 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5817 	    return(ret);
5818 	}
5819 	tmp = ret;
5820 	while (tmp != NULL) {
5821 	    if (xmlStrEqual(name, tmp->name)) {
5822 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5823 	  "standalone: attribute enumeration value token %s duplicated\n",
5824 				 name, NULL);
5825 		if (!xmlDictOwns(ctxt->dict, name))
5826 		    xmlFree(name);
5827 		break;
5828 	    }
5829 	    tmp = tmp->next;
5830 	}
5831 	if (tmp == NULL) {
5832 	    cur = xmlCreateEnumeration(name);
5833 	    if (!xmlDictOwns(ctxt->dict, name))
5834 		xmlFree(name);
5835 	    if (cur == NULL) {
5836                 xmlFreeEnumeration(ret);
5837                 return(NULL);
5838             }
5839 	    if (last == NULL) ret = last = cur;
5840 	    else {
5841 		last->next = cur;
5842 		last = cur;
5843 	    }
5844 	}
5845 	SKIP_BLANKS;
5846     } while (RAW == '|');
5847     if (RAW != ')') {
5848 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5849 	return(ret);
5850     }
5851     NEXT;
5852     return(ret);
5853 }
5854 
5855 /**
5856  * xmlParseEnumeratedType:
5857  * @ctxt:  an XML parser context
5858  * @tree:  the enumeration tree built while parsing
5859  *
5860  * parse an Enumerated attribute type.
5861  *
5862  * [57] EnumeratedType ::= NotationType | Enumeration
5863  *
5864  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5865  *
5866  *
5867  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5868  */
5869 
5870 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5871 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5872     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5873 	SKIP(8);
5874 	if (SKIP_BLANKS == 0) {
5875 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5876 			   "Space required after 'NOTATION'\n");
5877 	    return(0);
5878 	}
5879 	*tree = xmlParseNotationType(ctxt);
5880 	if (*tree == NULL) return(0);
5881 	return(XML_ATTRIBUTE_NOTATION);
5882     }
5883     *tree = xmlParseEnumerationType(ctxt);
5884     if (*tree == NULL) return(0);
5885     return(XML_ATTRIBUTE_ENUMERATION);
5886 }
5887 
5888 /**
5889  * xmlParseAttributeType:
5890  * @ctxt:  an XML parser context
5891  * @tree:  the enumeration tree built while parsing
5892  *
5893  * parse the Attribute list def for an element
5894  *
5895  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5896  *
5897  * [55] StringType ::= 'CDATA'
5898  *
5899  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5901  *
5902  * Validity constraints for attribute values syntax are checked in
5903  * xmlValidateAttributeValue()
5904  *
5905  * [ VC: ID ]
5906  * Values of type ID must match the Name production. A name must not
5907  * appear more than once in an XML document as a value of this type;
5908  * i.e., ID values must uniquely identify the elements which bear them.
5909  *
5910  * [ VC: One ID per Element Type ]
5911  * No element type may have more than one ID attribute specified.
5912  *
5913  * [ VC: ID Attribute Default ]
5914  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5915  *
5916  * [ VC: IDREF ]
5917  * Values of type IDREF must match the Name production, and values
5918  * of type IDREFS must match Names; each IDREF Name must match the value
5919  * of an ID attribute on some element in the XML document; i.e. IDREF
5920  * values must match the value of some ID attribute.
5921  *
5922  * [ VC: Entity Name ]
5923  * Values of type ENTITY must match the Name production, values
5924  * of type ENTITIES must match Names; each Entity Name must match the
5925  * name of an unparsed entity declared in the DTD.
5926  *
5927  * [ VC: Name Token ]
5928  * Values of type NMTOKEN must match the Nmtoken production; values
5929  * of type NMTOKENS must match Nmtokens.
5930  *
5931  * Returns the attribute type
5932  */
5933 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5934 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935     SHRINK;
5936     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5937 	SKIP(5);
5938 	return(XML_ATTRIBUTE_CDATA);
5939      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5940 	SKIP(6);
5941 	return(XML_ATTRIBUTE_IDREFS);
5942      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5943 	SKIP(5);
5944 	return(XML_ATTRIBUTE_IDREF);
5945      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946         SKIP(2);
5947 	return(XML_ATTRIBUTE_ID);
5948      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5949 	SKIP(6);
5950 	return(XML_ATTRIBUTE_ENTITY);
5951      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5952 	SKIP(8);
5953 	return(XML_ATTRIBUTE_ENTITIES);
5954      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5955 	SKIP(8);
5956 	return(XML_ATTRIBUTE_NMTOKENS);
5957      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5958 	SKIP(7);
5959 	return(XML_ATTRIBUTE_NMTOKEN);
5960      }
5961      return(xmlParseEnumeratedType(ctxt, tree));
5962 }
5963 
5964 /**
5965  * xmlParseAttributeListDecl:
5966  * @ctxt:  an XML parser context
5967  *
5968  * : parse the Attribute list def for an element
5969  *
5970  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5971  *
5972  * [53] AttDef ::= S Name S AttType S DefaultDecl
5973  *
5974  */
5975 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5976 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5977     const xmlChar *elemName;
5978     const xmlChar *attrName;
5979     xmlEnumerationPtr tree;
5980 
5981     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5982 	int inputid = ctxt->input->id;
5983 
5984 	SKIP(9);
5985 	if (SKIP_BLANKS == 0) {
5986 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5987 		                 "Space required after '<!ATTLIST'\n");
5988 	}
5989         elemName = xmlParseName(ctxt);
5990 	if (elemName == NULL) {
5991 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992 			   "ATTLIST: no name for Element\n");
5993 	    return;
5994 	}
5995 	SKIP_BLANKS;
5996 	GROW;
5997 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5998 	    int type;
5999 	    int def;
6000 	    xmlChar *defaultValue = NULL;
6001 
6002 	    GROW;
6003             tree = NULL;
6004 	    attrName = xmlParseName(ctxt);
6005 	    if (attrName == NULL) {
6006 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007 			       "ATTLIST: no name for Attribute\n");
6008 		break;
6009 	    }
6010 	    GROW;
6011 	    if (SKIP_BLANKS == 0) {
6012 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013 		        "Space required after the attribute name\n");
6014 		break;
6015 	    }
6016 
6017 	    type = xmlParseAttributeType(ctxt, &tree);
6018 	    if (type <= 0) {
6019 	        break;
6020 	    }
6021 
6022 	    GROW;
6023 	    if (SKIP_BLANKS == 0) {
6024 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025 			       "Space required after the attribute type\n");
6026 	        if (tree != NULL)
6027 		    xmlFreeEnumeration(tree);
6028 		break;
6029 	    }
6030 
6031 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032 	    if (def <= 0) {
6033                 if (defaultValue != NULL)
6034 		    xmlFree(defaultValue);
6035 	        if (tree != NULL)
6036 		    xmlFreeEnumeration(tree);
6037 	        break;
6038 	    }
6039 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6041 
6042 	    GROW;
6043             if (RAW != '>') {
6044 		if (SKIP_BLANKS == 0) {
6045 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6046 			"Space required after the attribute default value\n");
6047 		    if (defaultValue != NULL)
6048 			xmlFree(defaultValue);
6049 		    if (tree != NULL)
6050 			xmlFreeEnumeration(tree);
6051 		    break;
6052 		}
6053 	    }
6054 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055 		(ctxt->sax->attributeDecl != NULL))
6056 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057 	                        type, def, defaultValue, tree);
6058 	    else if (tree != NULL)
6059 		xmlFreeEnumeration(tree);
6060 
6061 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6062 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6063 		(def != XML_ATTRIBUTE_REQUIRED)) {
6064 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6065 	    }
6066 	    if (ctxt->sax2) {
6067 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6068 	    }
6069 	    if (defaultValue != NULL)
6070 	        xmlFree(defaultValue);
6071 	    GROW;
6072 	}
6073 	if (RAW == '>') {
6074 	    if (inputid != ctxt->input->id) {
6075 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6076                                "Attribute list declaration doesn't start and"
6077                                " stop in the same entity\n");
6078 	    }
6079 	    NEXT;
6080 	}
6081     }
6082 }
6083 
6084 /**
6085  * xmlParseElementMixedContentDecl:
6086  * @ctxt:  an XML parser context
6087  * @inputchk:  the input used for the current entity, needed for boundary checks
6088  *
6089  * parse the declaration for a Mixed Element content
6090  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6091  *
6092  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093  *                '(' S? '#PCDATA' S? ')'
6094  *
6095  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6096  *
6097  * [ VC: No Duplicate Types ]
6098  * The same name must not appear more than once in a single
6099  * mixed-content declaration.
6100  *
6101  * returns: the list of the xmlElementContentPtr describing the element choices
6102  */
6103 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6104 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6105     xmlElementContentPtr ret = NULL, cur = NULL, n;
6106     const xmlChar *elem = NULL;
6107 
6108     GROW;
6109     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6110 	SKIP(7);
6111 	SKIP_BLANKS;
6112 	SHRINK;
6113 	if (RAW == ')') {
6114 	    if (ctxt->input->id != inputchk) {
6115 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6116                                "Element content declaration doesn't start and"
6117                                " stop in the same entity\n");
6118 	    }
6119 	    NEXT;
6120 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121 	    if (ret == NULL)
6122 	        return(NULL);
6123 	    if (RAW == '*') {
6124 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 		NEXT;
6126 	    }
6127 	    return(ret);
6128 	}
6129 	if ((RAW == '(') || (RAW == '|')) {
6130 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131 	    if (ret == NULL) return(NULL);
6132 	}
6133 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6134 	    NEXT;
6135 	    if (elem == NULL) {
6136 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137 		if (ret == NULL) {
6138 		    xmlFreeDocElementContent(ctxt->myDoc, cur);
6139                     return(NULL);
6140                 }
6141 		ret->c1 = cur;
6142 		if (cur != NULL)
6143 		    cur->parent = ret;
6144 		cur = ret;
6145 	    } else {
6146 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147 		if (n == NULL) {
6148 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6149                     return(NULL);
6150                 }
6151 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6152 		if (n->c1 != NULL)
6153 		    n->c1->parent = n;
6154 	        cur->c2 = n;
6155 		if (n != NULL)
6156 		    n->parent = cur;
6157 		cur = n;
6158 	    }
6159 	    SKIP_BLANKS;
6160 	    elem = xmlParseName(ctxt);
6161 	    if (elem == NULL) {
6162 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6163 			"xmlParseElementMixedContentDecl : Name expected\n");
6164 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6165 		return(NULL);
6166 	    }
6167 	    SKIP_BLANKS;
6168 	    GROW;
6169 	}
6170 	if ((RAW == ')') && (NXT(1) == '*')) {
6171 	    if (elem != NULL) {
6172 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6173 		                               XML_ELEMENT_CONTENT_ELEMENT);
6174 		if (cur->c2 != NULL)
6175 		    cur->c2->parent = cur;
6176             }
6177             if (ret != NULL)
6178                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 	    if (ctxt->input->id != inputchk) {
6180 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181                                "Element content declaration doesn't start and"
6182                                " stop in the same entity\n");
6183 	    }
6184 	    SKIP(2);
6185 	} else {
6186 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6187 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6188 	    return(NULL);
6189 	}
6190 
6191     } else {
6192 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6193     }
6194     return(ret);
6195 }
6196 
6197 /**
6198  * xmlParseElementChildrenContentDeclPriv:
6199  * @ctxt:  an XML parser context
6200  * @inputchk:  the input used for the current entity, needed for boundary checks
6201  * @depth: the level of recursion
6202  *
6203  * parse the declaration for a Mixed Element content
6204  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6205  *
6206  *
6207  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6208  *
6209  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6210  *
6211  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6212  *
6213  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6214  *
6215  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216  * TODO Parameter-entity replacement text must be properly nested
6217  *	with parenthesized groups. That is to say, if either of the
6218  *	opening or closing parentheses in a choice, seq, or Mixed
6219  *	construct is contained in the replacement text for a parameter
6220  *	entity, both must be contained in the same replacement text. For
6221  *	interoperability, if a parameter-entity reference appears in a
6222  *	choice, seq, or Mixed construct, its replacement text should not
6223  *	be empty, and neither the first nor last non-blank character of
6224  *	the replacement text should be a connector (| or ,).
6225  *
6226  * Returns the tree of xmlElementContentPtr describing the element
6227  *          hierarchy.
6228  */
6229 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6230 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6231                                        int depth) {
6232     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6233     const xmlChar *elem;
6234     xmlChar type = 0;
6235 
6236     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6237         (depth >  2048)) {
6238         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6239 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6240                           depth);
6241 	return(NULL);
6242     }
6243     SKIP_BLANKS;
6244     GROW;
6245     if (RAW == '(') {
6246 	int inputid = ctxt->input->id;
6247 
6248         /* Recurse on first child */
6249 	NEXT;
6250 	SKIP_BLANKS;
6251         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6252                                                            depth + 1);
6253         if (cur == NULL)
6254             return(NULL);
6255 	SKIP_BLANKS;
6256 	GROW;
6257     } else {
6258 	elem = xmlParseName(ctxt);
6259 	if (elem == NULL) {
6260 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6261 	    return(NULL);
6262 	}
6263         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6264 	if (cur == NULL) {
6265 	    xmlErrMemory(ctxt, NULL);
6266 	    return(NULL);
6267 	}
6268 	GROW;
6269 	if (RAW == '?') {
6270 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6271 	    NEXT;
6272 	} else if (RAW == '*') {
6273 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6274 	    NEXT;
6275 	} else if (RAW == '+') {
6276 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6277 	    NEXT;
6278 	} else {
6279 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6280 	}
6281 	GROW;
6282     }
6283     SKIP_BLANKS;
6284     SHRINK;
6285     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6286         /*
6287 	 * Each loop we parse one separator and one element.
6288 	 */
6289         if (RAW == ',') {
6290 	    if (type == 0) type = CUR;
6291 
6292 	    /*
6293 	     * Detect "Name | Name , Name" error
6294 	     */
6295 	    else if (type != CUR) {
6296 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6297 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6298 		                  type);
6299 		if ((last != NULL) && (last != ret))
6300 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6301 		if (ret != NULL)
6302 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6303 		return(NULL);
6304 	    }
6305 	    NEXT;
6306 
6307 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6308 	    if (op == NULL) {
6309 		if ((last != NULL) && (last != ret))
6310 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6311 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6312 		return(NULL);
6313 	    }
6314 	    if (last == NULL) {
6315 		op->c1 = ret;
6316 		if (ret != NULL)
6317 		    ret->parent = op;
6318 		ret = cur = op;
6319 	    } else {
6320 	        cur->c2 = op;
6321 		if (op != NULL)
6322 		    op->parent = cur;
6323 		op->c1 = last;
6324 		if (last != NULL)
6325 		    last->parent = op;
6326 		cur =op;
6327 		last = NULL;
6328 	    }
6329 	} else if (RAW == '|') {
6330 	    if (type == 0) type = CUR;
6331 
6332 	    /*
6333 	     * Detect "Name , Name | Name" error
6334 	     */
6335 	    else if (type != CUR) {
6336 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6337 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6338 				  type);
6339 		if ((last != NULL) && (last != ret))
6340 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6341 		if (ret != NULL)
6342 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 		return(NULL);
6344 	    }
6345 	    NEXT;
6346 
6347 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6348 	    if (op == NULL) {
6349 		if ((last != NULL) && (last != ret))
6350 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6351 		if (ret != NULL)
6352 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6353 		return(NULL);
6354 	    }
6355 	    if (last == NULL) {
6356 		op->c1 = ret;
6357 		if (ret != NULL)
6358 		    ret->parent = op;
6359 		ret = cur = op;
6360 	    } else {
6361 	        cur->c2 = op;
6362 		if (op != NULL)
6363 		    op->parent = cur;
6364 		op->c1 = last;
6365 		if (last != NULL)
6366 		    last->parent = op;
6367 		cur =op;
6368 		last = NULL;
6369 	    }
6370 	} else {
6371 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6372 	    if ((last != NULL) && (last != ret))
6373 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6374 	    if (ret != NULL)
6375 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6376 	    return(NULL);
6377 	}
6378 	GROW;
6379 	SKIP_BLANKS;
6380 	GROW;
6381 	if (RAW == '(') {
6382 	    int inputid = ctxt->input->id;
6383 	    /* Recurse on second child */
6384 	    NEXT;
6385 	    SKIP_BLANKS;
6386 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387                                                           depth + 1);
6388             if (last == NULL) {
6389 		if (ret != NULL)
6390 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6391 		return(NULL);
6392             }
6393 	    SKIP_BLANKS;
6394 	} else {
6395 	    elem = xmlParseName(ctxt);
6396 	    if (elem == NULL) {
6397 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6398 		if (ret != NULL)
6399 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 		return(NULL);
6401 	    }
6402 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6403 	    if (last == NULL) {
6404 		if (ret != NULL)
6405 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 		return(NULL);
6407 	    }
6408 	    if (RAW == '?') {
6409 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6410 		NEXT;
6411 	    } else if (RAW == '*') {
6412 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6413 		NEXT;
6414 	    } else if (RAW == '+') {
6415 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416 		NEXT;
6417 	    } else {
6418 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6419 	    }
6420 	}
6421 	SKIP_BLANKS;
6422 	GROW;
6423     }
6424     if ((cur != NULL) && (last != NULL)) {
6425         cur->c2 = last;
6426 	if (last != NULL)
6427 	    last->parent = cur;
6428     }
6429     if (ctxt->input->id != inputchk) {
6430 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431                        "Element content declaration doesn't start and stop in"
6432                        " the same entity\n");
6433     }
6434     NEXT;
6435     if (RAW == '?') {
6436 	if (ret != NULL) {
6437 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440 	    else
6441 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6442 	}
6443 	NEXT;
6444     } else if (RAW == '*') {
6445 	if (ret != NULL) {
6446 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6447 	    cur = ret;
6448 	    /*
6449 	     * Some normalization:
6450 	     * (a | b* | c?)* == (a | b | c)*
6451 	     */
6452 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6453 		if ((cur->c1 != NULL) &&
6454 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 		if ((cur->c2 != NULL) &&
6458 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 		cur = cur->c2;
6462 	    }
6463 	}
6464 	NEXT;
6465     } else if (RAW == '+') {
6466 	if (ret != NULL) {
6467 	    int found = 0;
6468 
6469 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6472 	    else
6473 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6474 	    /*
6475 	     * Some normalization:
6476 	     * (a | b*)+ == (a | b)*
6477 	     * (a | b?)+ == (a | b)*
6478 	     */
6479 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6480 		if ((cur->c1 != NULL) &&
6481 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 		    found = 1;
6485 		}
6486 		if ((cur->c2 != NULL) &&
6487 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 		    found = 1;
6491 		}
6492 		cur = cur->c2;
6493 	    }
6494 	    if (found)
6495 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6496 	}
6497 	NEXT;
6498     }
6499     return(ret);
6500 }
6501 
6502 /**
6503  * xmlParseElementChildrenContentDecl:
6504  * @ctxt:  an XML parser context
6505  * @inputchk:  the input used for the current entity, needed for boundary checks
6506  *
6507  * parse the declaration for a Mixed Element content
6508  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6509  *
6510  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6511  *
6512  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6513  *
6514  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6515  *
6516  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6517  *
6518  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519  * TODO Parameter-entity replacement text must be properly nested
6520  *	with parenthesized groups. That is to say, if either of the
6521  *	opening or closing parentheses in a choice, seq, or Mixed
6522  *	construct is contained in the replacement text for a parameter
6523  *	entity, both must be contained in the same replacement text. For
6524  *	interoperability, if a parameter-entity reference appears in a
6525  *	choice, seq, or Mixed construct, its replacement text should not
6526  *	be empty, and neither the first nor last non-blank character of
6527  *	the replacement text should be a connector (| or ,).
6528  *
6529  * Returns the tree of xmlElementContentPtr describing the element
6530  *          hierarchy.
6531  */
6532 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6533 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534     /* stub left for API/ABI compat */
6535     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6536 }
6537 
6538 /**
6539  * xmlParseElementContentDecl:
6540  * @ctxt:  an XML parser context
6541  * @name:  the name of the element being defined.
6542  * @result:  the Element Content pointer will be stored here if any
6543  *
6544  * parse the declaration for an Element content either Mixed or Children,
6545  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6546  *
6547  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6548  *
6549  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6550  */
6551 
6552 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6553 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6554                            xmlElementContentPtr *result) {
6555 
6556     xmlElementContentPtr tree = NULL;
6557     int inputid = ctxt->input->id;
6558     int res;
6559 
6560     *result = NULL;
6561 
6562     if (RAW != '(') {
6563 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6564 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6565 	return(-1);
6566     }
6567     NEXT;
6568     GROW;
6569     if (ctxt->instate == XML_PARSER_EOF)
6570         return(-1);
6571     SKIP_BLANKS;
6572     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6574 	res = XML_ELEMENT_TYPE_MIXED;
6575     } else {
6576         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6577 	res = XML_ELEMENT_TYPE_ELEMENT;
6578     }
6579     SKIP_BLANKS;
6580     *result = tree;
6581     return(res);
6582 }
6583 
6584 /**
6585  * xmlParseElementDecl:
6586  * @ctxt:  an XML parser context
6587  *
6588  * parse an Element declaration.
6589  *
6590  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6591  *
6592  * [ VC: Unique Element Type Declaration ]
6593  * No element type may be declared more than once
6594  *
6595  * Returns the type of the element, or -1 in case of error
6596  */
6597 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6598 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6599     const xmlChar *name;
6600     int ret = -1;
6601     xmlElementContentPtr content  = NULL;
6602 
6603     /* GROW; done in the caller */
6604     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605 	int inputid = ctxt->input->id;
6606 
6607 	SKIP(9);
6608 	if (SKIP_BLANKS == 0) {
6609 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610 		           "Space required after 'ELEMENT'\n");
6611 	    return(-1);
6612 	}
6613         name = xmlParseName(ctxt);
6614 	if (name == NULL) {
6615 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616 			   "xmlParseElementDecl: no name for Element\n");
6617 	    return(-1);
6618 	}
6619 	if (SKIP_BLANKS == 0) {
6620 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6621 			   "Space required after the element name\n");
6622 	}
6623 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6624 	    SKIP(5);
6625 	    /*
6626 	     * Element must always be empty.
6627 	     */
6628 	    ret = XML_ELEMENT_TYPE_EMPTY;
6629 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6630 	           (NXT(2) == 'Y')) {
6631 	    SKIP(3);
6632 	    /*
6633 	     * Element is a generic container.
6634 	     */
6635 	    ret = XML_ELEMENT_TYPE_ANY;
6636 	} else if (RAW == '(') {
6637 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6638 	} else {
6639 	    /*
6640 	     * [ WFC: PEs in Internal Subset ] error handling.
6641 	     */
6642 	    if ((RAW == '%') && (ctxt->external == 0) &&
6643 	        (ctxt->inputNr == 1)) {
6644 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6645 	  "PEReference: forbidden within markup decl in internal subset\n");
6646 	    } else {
6647 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6648 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6649             }
6650 	    return(-1);
6651 	}
6652 
6653 	SKIP_BLANKS;
6654 
6655 	if (RAW != '>') {
6656 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6657 	    if (content != NULL) {
6658 		xmlFreeDocElementContent(ctxt->myDoc, content);
6659 	    }
6660 	} else {
6661 	    if (inputid != ctxt->input->id) {
6662 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6663                                "Element declaration doesn't start and stop in"
6664                                " the same entity\n");
6665 	    }
6666 
6667 	    NEXT;
6668 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6669 		(ctxt->sax->elementDecl != NULL)) {
6670 		if (content != NULL)
6671 		    content->parent = NULL;
6672 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6673 		                       content);
6674 		if ((content != NULL) && (content->parent == NULL)) {
6675 		    /*
6676 		     * this is a trick: if xmlAddElementDecl is called,
6677 		     * instead of copying the full tree it is plugged directly
6678 		     * if called from the parser. Avoid duplicating the
6679 		     * interfaces or change the API/ABI
6680 		     */
6681 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6682 		}
6683 	    } else if (content != NULL) {
6684 		xmlFreeDocElementContent(ctxt->myDoc, content);
6685 	    }
6686 	}
6687     }
6688     return(ret);
6689 }
6690 
6691 /**
6692  * xmlParseConditionalSections
6693  * @ctxt:  an XML parser context
6694  *
6695  * [61] conditionalSect ::= includeSect | ignoreSect
6696  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6697  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6700  */
6701 
6702 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6703 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6704     int *inputIds = NULL;
6705     size_t inputIdsSize = 0;
6706     size_t depth = 0;
6707 
6708     while (ctxt->instate != XML_PARSER_EOF) {
6709         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710             int id = ctxt->input->id;
6711 
6712             SKIP(3);
6713             SKIP_BLANKS;
6714 
6715             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716                 SKIP(7);
6717                 SKIP_BLANKS;
6718                 if (RAW != '[') {
6719                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6720                     xmlHaltParser(ctxt);
6721                     goto error;
6722                 }
6723                 if (ctxt->input->id != id) {
6724                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6725                                    "All markup of the conditional section is"
6726                                    " not in the same entity\n");
6727                 }
6728                 NEXT;
6729 
6730                 if (inputIdsSize <= depth) {
6731                     int *tmp;
6732 
6733                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6734                     tmp = (int *) xmlRealloc(inputIds,
6735                             inputIdsSize * sizeof(int));
6736                     if (tmp == NULL) {
6737                         xmlErrMemory(ctxt, NULL);
6738                         goto error;
6739                     }
6740                     inputIds = tmp;
6741                 }
6742                 inputIds[depth] = id;
6743                 depth++;
6744             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6745                 int state;
6746                 xmlParserInputState instate;
6747                 size_t ignoreDepth = 0;
6748 
6749                 SKIP(6);
6750                 SKIP_BLANKS;
6751                 if (RAW != '[') {
6752                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6753                     xmlHaltParser(ctxt);
6754                     goto error;
6755                 }
6756                 if (ctxt->input->id != id) {
6757                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6758                                    "All markup of the conditional section is"
6759                                    " not in the same entity\n");
6760                 }
6761                 NEXT;
6762 
6763                 /*
6764                  * Parse up to the end of the conditional section but disable
6765                  * SAX event generating DTD building in the meantime
6766                  */
6767                 state = ctxt->disableSAX;
6768                 instate = ctxt->instate;
6769                 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6770                 ctxt->instate = XML_PARSER_IGNORE;
6771 
6772                 while (RAW != 0) {
6773                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6774                         SKIP(3);
6775                         ignoreDepth++;
6776                         /* Check for integer overflow */
6777                         if (ignoreDepth == 0) {
6778                             xmlErrMemory(ctxt, NULL);
6779                             goto error;
6780                         }
6781                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6782                                (NXT(2) == '>')) {
6783                         if (ignoreDepth == 0)
6784                             break;
6785                         SKIP(3);
6786                         ignoreDepth--;
6787                     } else {
6788                         NEXT;
6789                     }
6790                 }
6791 
6792                 ctxt->disableSAX = state;
6793                 ctxt->instate = instate;
6794 
6795 		if (RAW == 0) {
6796 		    xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6797                     goto error;
6798 		}
6799                 if (ctxt->input->id != id) {
6800                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801                                    "All markup of the conditional section is"
6802                                    " not in the same entity\n");
6803                 }
6804                 SKIP(3);
6805             } else {
6806                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6807                 xmlHaltParser(ctxt);
6808                 goto error;
6809             }
6810         } else if ((depth > 0) &&
6811                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6812             depth--;
6813             if (ctxt->input->id != inputIds[depth]) {
6814                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815                                "All markup of the conditional section is not"
6816                                " in the same entity\n");
6817             }
6818             SKIP(3);
6819         } else {
6820             int id = ctxt->input->id;
6821             unsigned long cons = CUR_CONSUMED;
6822 
6823             xmlParseMarkupDecl(ctxt);
6824 
6825             if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6826                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6827                 xmlHaltParser(ctxt);
6828                 goto error;
6829             }
6830         }
6831 
6832         if (depth == 0)
6833             break;
6834 
6835         SKIP_BLANKS;
6836         GROW;
6837     }
6838 
6839 error:
6840     xmlFree(inputIds);
6841 }
6842 
6843 /**
6844  * xmlParseMarkupDecl:
6845  * @ctxt:  an XML parser context
6846  *
6847  * parse Markup declarations
6848  *
6849  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850  *                     NotationDecl | PI | Comment
6851  *
6852  * [ VC: Proper Declaration/PE Nesting ]
6853  * Parameter-entity replacement text must be properly nested with
6854  * markup declarations. That is to say, if either the first character
6855  * or the last character of a markup declaration (markupdecl above) is
6856  * contained in the replacement text for a parameter-entity reference,
6857  * both must be contained in the same replacement text.
6858  *
6859  * [ WFC: PEs in Internal Subset ]
6860  * In the internal DTD subset, parameter-entity references can occur
6861  * only where markup declarations can occur, not within markup declarations.
6862  * (This does not apply to references that occur in external parameter
6863  * entities or to the external subset.)
6864  */
6865 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6866 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6867     GROW;
6868     if (CUR == '<') {
6869         if (NXT(1) == '!') {
6870 	    switch (NXT(2)) {
6871 	        case 'E':
6872 		    if (NXT(3) == 'L')
6873 			xmlParseElementDecl(ctxt);
6874 		    else if (NXT(3) == 'N')
6875 			xmlParseEntityDecl(ctxt);
6876 		    break;
6877 	        case 'A':
6878 		    xmlParseAttributeListDecl(ctxt);
6879 		    break;
6880 	        case 'N':
6881 		    xmlParseNotationDecl(ctxt);
6882 		    break;
6883 	        case '-':
6884 		    xmlParseComment(ctxt);
6885 		    break;
6886 		default:
6887 		    /* there is an error but it will be detected later */
6888 		    break;
6889 	    }
6890 	} else if (NXT(1) == '?') {
6891 	    xmlParsePI(ctxt);
6892 	}
6893     }
6894 
6895     /*
6896      * detect requirement to exit there and act accordingly
6897      * and avoid having instate overridden later on
6898      */
6899     if (ctxt->instate == XML_PARSER_EOF)
6900         return;
6901 
6902     ctxt->instate = XML_PARSER_DTD;
6903 }
6904 
6905 /**
6906  * xmlParseTextDecl:
6907  * @ctxt:  an XML parser context
6908  *
6909  * parse an XML declaration header for external entities
6910  *
6911  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6912  */
6913 
6914 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6915 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6916     xmlChar *version;
6917     const xmlChar *encoding;
6918     int oldstate;
6919 
6920     /*
6921      * We know that '<?xml' is here.
6922      */
6923     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6924 	SKIP(5);
6925     } else {
6926 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6927 	return;
6928     }
6929 
6930     /* Avoid expansion of parameter entities when skipping blanks. */
6931     oldstate = ctxt->instate;
6932     ctxt->instate = XML_PARSER_START;
6933 
6934     if (SKIP_BLANKS == 0) {
6935 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6936 		       "Space needed after '<?xml'\n");
6937     }
6938 
6939     /*
6940      * We may have the VersionInfo here.
6941      */
6942     version = xmlParseVersionInfo(ctxt);
6943     if (version == NULL)
6944 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6945     else {
6946 	if (SKIP_BLANKS == 0) {
6947 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948 		           "Space needed here\n");
6949 	}
6950     }
6951     ctxt->input->version = version;
6952 
6953     /*
6954      * We must have the encoding declaration
6955      */
6956     encoding = xmlParseEncodingDecl(ctxt);
6957     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6958 	/*
6959 	 * The XML REC instructs us to stop parsing right here
6960 	 */
6961         ctxt->instate = oldstate;
6962         return;
6963     }
6964     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6965 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6966 		       "Missing encoding in text declaration\n");
6967     }
6968 
6969     SKIP_BLANKS;
6970     if ((RAW == '?') && (NXT(1) == '>')) {
6971         SKIP(2);
6972     } else if (RAW == '>') {
6973         /* Deprecated old WD ... */
6974 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6975 	NEXT;
6976     } else {
6977 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6978 	MOVETO_ENDTAG(CUR_PTR);
6979 	NEXT;
6980     }
6981 
6982     ctxt->instate = oldstate;
6983 }
6984 
6985 /**
6986  * xmlParseExternalSubset:
6987  * @ctxt:  an XML parser context
6988  * @ExternalID: the external identifier
6989  * @SystemID: the system identifier (or URL)
6990  *
6991  * parse Markup declarations from an external subset
6992  *
6993  * [30] extSubset ::= textDecl? extSubsetDecl
6994  *
6995  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6996  */
6997 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6998 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6999                        const xmlChar *SystemID) {
7000     xmlDetectSAX2(ctxt);
7001     GROW;
7002 
7003     if ((ctxt->encoding == NULL) &&
7004         (ctxt->input->end - ctxt->input->cur >= 4)) {
7005         xmlChar start[4];
7006 	xmlCharEncoding enc;
7007 
7008 	start[0] = RAW;
7009 	start[1] = NXT(1);
7010 	start[2] = NXT(2);
7011 	start[3] = NXT(3);
7012 	enc = xmlDetectCharEncoding(start, 4);
7013 	if (enc != XML_CHAR_ENCODING_NONE)
7014 	    xmlSwitchEncoding(ctxt, enc);
7015     }
7016 
7017     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7018 	xmlParseTextDecl(ctxt);
7019 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7020 	    /*
7021 	     * The XML REC instructs us to stop parsing right here
7022 	     */
7023 	    xmlHaltParser(ctxt);
7024 	    return;
7025 	}
7026     }
7027     if (ctxt->myDoc == NULL) {
7028         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7029 	if (ctxt->myDoc == NULL) {
7030 	    xmlErrMemory(ctxt, "New Doc failed");
7031 	    return;
7032 	}
7033 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7034     }
7035     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7036         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7037 
7038     ctxt->instate = XML_PARSER_DTD;
7039     ctxt->external = 1;
7040     SKIP_BLANKS;
7041     while (((RAW == '<') && (NXT(1) == '?')) ||
7042            ((RAW == '<') && (NXT(1) == '!')) ||
7043 	   (RAW == '%')) {
7044 	int id = ctxt->input->id;
7045 	unsigned long cons = CUR_CONSUMED;
7046 
7047 	GROW;
7048         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049 	    xmlParseConditionalSections(ctxt);
7050 	} else
7051 	    xmlParseMarkupDecl(ctxt);
7052         SKIP_BLANKS;
7053 
7054 	if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7055 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056 	    break;
7057 	}
7058     }
7059 
7060     if (RAW != 0) {
7061 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7062     }
7063 
7064 }
7065 
7066 /**
7067  * xmlParseReference:
7068  * @ctxt:  an XML parser context
7069  *
7070  * parse and handle entity references in content, depending on the SAX
7071  * interface, this may end-up in a call to character() if this is a
7072  * CharRef, a predefined entity, if there is no reference() callback.
7073  * or if the parser was asked to switch to that mode.
7074  *
7075  * [67] Reference ::= EntityRef | CharRef
7076  */
7077 void
xmlParseReference(xmlParserCtxtPtr ctxt)7078 xmlParseReference(xmlParserCtxtPtr ctxt) {
7079     xmlEntityPtr ent;
7080     xmlChar *val;
7081     int was_checked;
7082     xmlNodePtr list = NULL;
7083     xmlParserErrors ret = XML_ERR_OK;
7084 
7085 
7086     if (RAW != '&')
7087         return;
7088 
7089     /*
7090      * Simple case of a CharRef
7091      */
7092     if (NXT(1) == '#') {
7093 	int i = 0;
7094 	xmlChar out[16];
7095 	int hex = NXT(2);
7096 	int value = xmlParseCharRef(ctxt);
7097 
7098 	if (value == 0)
7099 	    return;
7100 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7101 	    /*
7102 	     * So we are using non-UTF-8 buffers
7103 	     * Check that the char fit on 8bits, if not
7104 	     * generate a CharRef.
7105 	     */
7106 	    if (value <= 0xFF) {
7107 		out[0] = value;
7108 		out[1] = 0;
7109 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7110 		    (!ctxt->disableSAX))
7111 		    ctxt->sax->characters(ctxt->userData, out, 1);
7112 	    } else {
7113 		if ((hex == 'x') || (hex == 'X'))
7114 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7115 		else
7116 		    snprintf((char *)out, sizeof(out), "#%d", value);
7117 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7118 		    (!ctxt->disableSAX))
7119 		    ctxt->sax->reference(ctxt->userData, out);
7120 	    }
7121 	} else {
7122 	    /*
7123 	     * Just encode the value in UTF-8
7124 	     */
7125 	    COPY_BUF(0 ,out, i, value);
7126 	    out[i] = 0;
7127 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7128 		(!ctxt->disableSAX))
7129 		ctxt->sax->characters(ctxt->userData, out, i);
7130 	}
7131 	return;
7132     }
7133 
7134     /*
7135      * We are seeing an entity reference
7136      */
7137     ent = xmlParseEntityRef(ctxt);
7138     if (ent == NULL) return;
7139     if (!ctxt->wellFormed)
7140 	return;
7141     was_checked = ent->checked;
7142 
7143     /* special case of predefined entities */
7144     if ((ent->name == NULL) ||
7145         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7146 	val = ent->content;
7147 	if (val == NULL) return;
7148 	/*
7149 	 * inline the entity.
7150 	 */
7151 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 	    (!ctxt->disableSAX))
7153 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7154 	return;
7155     }
7156 
7157     /*
7158      * The first reference to the entity trigger a parsing phase
7159      * where the ent->children is filled with the result from
7160      * the parsing.
7161      * Note: external parsed entities will not be loaded, it is not
7162      * required for a non-validating parser, unless the parsing option
7163      * of validating, or substituting entities were given. Doing so is
7164      * far more secure as the parser will only process data coming from
7165      * the document entity by default.
7166      */
7167     if (((ent->checked == 0) ||
7168          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7169         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7170          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7171 	unsigned long oldnbent = ctxt->nbentities, diff;
7172 
7173 	/*
7174 	 * This is a bit hackish but this seems the best
7175 	 * way to make sure both SAX and DOM entity support
7176 	 * behaves okay.
7177 	 */
7178 	void *user_data;
7179 	if (ctxt->userData == ctxt)
7180 	    user_data = NULL;
7181 	else
7182 	    user_data = ctxt->userData;
7183 
7184 	/*
7185 	 * Check that this entity is well formed
7186 	 * 4.3.2: An internal general parsed entity is well-formed
7187 	 * if its replacement text matches the production labeled
7188 	 * content.
7189 	 */
7190 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7191 	    ctxt->depth++;
7192 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7193 	                                              user_data, &list);
7194 	    ctxt->depth--;
7195 
7196 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7197 	    ctxt->depth++;
7198 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7199 	                                   user_data, ctxt->depth, ent->URI,
7200 					   ent->ExternalID, &list);
7201 	    ctxt->depth--;
7202 	} else {
7203 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7204 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7205 			 "invalid entity type found\n", NULL);
7206 	}
7207 
7208 	/*
7209 	 * Store the number of entities needing parsing for this entity
7210 	 * content and do checkings
7211 	 */
7212         diff = ctxt->nbentities - oldnbent + 1;
7213         if (diff > INT_MAX / 2)
7214             diff = INT_MAX / 2;
7215         ent->checked = diff * 2;
7216 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7217 	    ent->checked |= 1;
7218 	if (ret == XML_ERR_ENTITY_LOOP) {
7219 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7220             xmlHaltParser(ctxt);
7221 	    xmlFreeNodeList(list);
7222 	    return;
7223 	}
7224 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7225 	    xmlFreeNodeList(list);
7226 	    return;
7227 	}
7228 
7229 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7230 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7231 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7232 		(ent->children == NULL)) {
7233 		ent->children = list;
7234                 /*
7235                  * Prune it directly in the generated document
7236                  * except for single text nodes.
7237                  */
7238                 if ((ctxt->replaceEntities == 0) ||
7239                     (ctxt->parseMode == XML_PARSE_READER) ||
7240                     ((list->type == XML_TEXT_NODE) &&
7241                      (list->next == NULL))) {
7242                     ent->owner = 1;
7243                     while (list != NULL) {
7244                         list->parent = (xmlNodePtr) ent;
7245                         if (list->doc != ent->doc)
7246                             xmlSetTreeDoc(list, ent->doc);
7247                         if (list->next == NULL)
7248                             ent->last = list;
7249                         list = list->next;
7250                     }
7251                     list = NULL;
7252                 } else {
7253                     ent->owner = 0;
7254                     while (list != NULL) {
7255                         list->parent = (xmlNodePtr) ctxt->node;
7256                         list->doc = ctxt->myDoc;
7257                         if (list->next == NULL)
7258                             ent->last = list;
7259                         list = list->next;
7260                     }
7261                     list = ent->children;
7262 #ifdef LIBXML_LEGACY_ENABLED
7263                     if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7264                         xmlAddEntityReference(ent, list, NULL);
7265 #endif /* LIBXML_LEGACY_ENABLED */
7266                 }
7267 	    } else {
7268 		xmlFreeNodeList(list);
7269 		list = NULL;
7270 	    }
7271 	} else if ((ret != XML_ERR_OK) &&
7272 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7273 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 		     "Entity '%s' failed to parse\n", ent->name);
7275             if (ent->content != NULL)
7276                 ent->content[0] = 0;
7277 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7278 	} else if (list != NULL) {
7279 	    xmlFreeNodeList(list);
7280 	    list = NULL;
7281 	}
7282 	if (ent->checked == 0)
7283 	    ent->checked = 2;
7284 
7285         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7286         was_checked = 0;
7287     } else if (ent->checked != 1) {
7288 	ctxt->nbentities += ent->checked / 2;
7289     }
7290 
7291     /*
7292      * Now that the entity content has been gathered
7293      * provide it to the application, this can take different forms based
7294      * on the parsing modes.
7295      */
7296     if (ent->children == NULL) {
7297 	/*
7298 	 * Probably running in SAX mode and the callbacks don't
7299 	 * build the entity content. So unless we already went
7300 	 * though parsing for first checking go though the entity
7301 	 * content to generate callbacks associated to the entity
7302 	 */
7303 	if (was_checked != 0) {
7304 	    void *user_data;
7305 	    /*
7306 	     * This is a bit hackish but this seems the best
7307 	     * way to make sure both SAX and DOM entity support
7308 	     * behaves okay.
7309 	     */
7310 	    if (ctxt->userData == ctxt)
7311 		user_data = NULL;
7312 	    else
7313 		user_data = ctxt->userData;
7314 
7315 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7316 		ctxt->depth++;
7317 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7318 				   ent->content, user_data, NULL);
7319 		ctxt->depth--;
7320 	    } else if (ent->etype ==
7321 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7322 		ctxt->depth++;
7323 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7324 			   ctxt->sax, user_data, ctxt->depth,
7325 			   ent->URI, ent->ExternalID, NULL);
7326 		ctxt->depth--;
7327 	    } else {
7328 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7329 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7330 			     "invalid entity type found\n", NULL);
7331 	    }
7332 	    if (ret == XML_ERR_ENTITY_LOOP) {
7333 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7334 		return;
7335 	    }
7336 	}
7337 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7338 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7339 	    /*
7340 	     * Entity reference callback comes second, it's somewhat
7341 	     * superfluous but a compatibility to historical behaviour
7342 	     */
7343 	    ctxt->sax->reference(ctxt->userData, ent->name);
7344 	}
7345 	return;
7346     }
7347 
7348     /*
7349      * If we didn't get any children for the entity being built
7350      */
7351     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7352 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7353 	/*
7354 	 * Create a node.
7355 	 */
7356 	ctxt->sax->reference(ctxt->userData, ent->name);
7357 	return;
7358     }
7359 
7360     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7361 	/*
7362 	 * There is a problem on the handling of _private for entities
7363 	 * (bug 155816): Should we copy the content of the field from
7364 	 * the entity (possibly overwriting some value set by the user
7365 	 * when a copy is created), should we leave it alone, or should
7366 	 * we try to take care of different situations?  The problem
7367 	 * is exacerbated by the usage of this field by the xmlReader.
7368 	 * To fix this bug, we look at _private on the created node
7369 	 * and, if it's NULL, we copy in whatever was in the entity.
7370 	 * If it's not NULL we leave it alone.  This is somewhat of a
7371 	 * hack - maybe we should have further tests to determine
7372 	 * what to do.
7373 	 */
7374 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7375 	    /*
7376 	     * Seems we are generating the DOM content, do
7377 	     * a simple tree copy for all references except the first
7378 	     * In the first occurrence list contains the replacement.
7379 	     */
7380 	    if (((list == NULL) && (ent->owner == 0)) ||
7381 		(ctxt->parseMode == XML_PARSE_READER)) {
7382 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7383 
7384 		/*
7385 		 * We are copying here, make sure there is no abuse
7386 		 */
7387 		ctxt->sizeentcopy += ent->length + 5;
7388 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7389 		    return;
7390 
7391 		/*
7392 		 * when operating on a reader, the entities definitions
7393 		 * are always owning the entities subtree.
7394 		if (ctxt->parseMode == XML_PARSE_READER)
7395 		    ent->owner = 1;
7396 		 */
7397 
7398 		cur = ent->children;
7399 		while (cur != NULL) {
7400 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7401 		    if (nw != NULL) {
7402 			if (nw->_private == NULL)
7403 			    nw->_private = cur->_private;
7404 			if (firstChild == NULL){
7405 			    firstChild = nw;
7406 			}
7407 			nw = xmlAddChild(ctxt->node, nw);
7408 		    }
7409 		    if (cur == ent->last) {
7410 			/*
7411 			 * needed to detect some strange empty
7412 			 * node cases in the reader tests
7413 			 */
7414 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7415 			    (nw != NULL) &&
7416 			    (nw->type == XML_ELEMENT_NODE) &&
7417 			    (nw->children == NULL))
7418 			    nw->extra = 1;
7419 
7420 			break;
7421 		    }
7422 		    cur = cur->next;
7423 		}
7424 #ifdef LIBXML_LEGACY_ENABLED
7425 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7426 		  xmlAddEntityReference(ent, firstChild, nw);
7427 #endif /* LIBXML_LEGACY_ENABLED */
7428 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7429 		xmlNodePtr nw = NULL, cur, next, last,
7430 			   firstChild = NULL;
7431 
7432 		/*
7433 		 * We are copying here, make sure there is no abuse
7434 		 */
7435 		ctxt->sizeentcopy += ent->length + 5;
7436 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7437 		    return;
7438 
7439 		/*
7440 		 * Copy the entity child list and make it the new
7441 		 * entity child list. The goal is to make sure any
7442 		 * ID or REF referenced will be the one from the
7443 		 * document content and not the entity copy.
7444 		 */
7445 		cur = ent->children;
7446 		ent->children = NULL;
7447 		last = ent->last;
7448 		ent->last = NULL;
7449 		while (cur != NULL) {
7450 		    next = cur->next;
7451 		    cur->next = NULL;
7452 		    cur->parent = NULL;
7453 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7454 		    if (nw != NULL) {
7455 			if (nw->_private == NULL)
7456 			    nw->_private = cur->_private;
7457 			if (firstChild == NULL){
7458 			    firstChild = cur;
7459 			}
7460 			xmlAddChild((xmlNodePtr) ent, nw);
7461 			xmlAddChild(ctxt->node, cur);
7462 		    }
7463 		    if (cur == last)
7464 			break;
7465 		    cur = next;
7466 		}
7467 		if (ent->owner == 0)
7468 		    ent->owner = 1;
7469 #ifdef LIBXML_LEGACY_ENABLED
7470 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7471 		  xmlAddEntityReference(ent, firstChild, nw);
7472 #endif /* LIBXML_LEGACY_ENABLED */
7473 	    } else {
7474 		const xmlChar *nbktext;
7475 
7476 		/*
7477 		 * the name change is to avoid coalescing of the
7478 		 * node with a possible previous text one which
7479 		 * would make ent->children a dangling pointer
7480 		 */
7481 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7482 					-1);
7483 		if (ent->children->type == XML_TEXT_NODE)
7484 		    ent->children->name = nbktext;
7485 		if ((ent->last != ent->children) &&
7486 		    (ent->last->type == XML_TEXT_NODE))
7487 		    ent->last->name = nbktext;
7488 		xmlAddChildList(ctxt->node, ent->children);
7489 	    }
7490 
7491 	    /*
7492 	     * This is to avoid a nasty side effect, see
7493 	     * characters() in SAX.c
7494 	     */
7495 	    ctxt->nodemem = 0;
7496 	    ctxt->nodelen = 0;
7497 	    return;
7498 	}
7499     }
7500 }
7501 
7502 /**
7503  * xmlParseEntityRef:
7504  * @ctxt:  an XML parser context
7505  *
7506  * parse ENTITY references declarations
7507  *
7508  * [68] EntityRef ::= '&' Name ';'
7509  *
7510  * [ WFC: Entity Declared ]
7511  * In a document without any DTD, a document with only an internal DTD
7512  * subset which contains no parameter entity references, or a document
7513  * with "standalone='yes'", the Name given in the entity reference
7514  * must match that in an entity declaration, except that well-formed
7515  * documents need not declare any of the following entities: amp, lt,
7516  * gt, apos, quot.  The declaration of a parameter entity must precede
7517  * any reference to it.  Similarly, the declaration of a general entity
7518  * must precede any reference to it which appears in a default value in an
7519  * attribute-list declaration. Note that if entities are declared in the
7520  * external subset or in external parameter entities, a non-validating
7521  * processor is not obligated to read and process their declarations;
7522  * for such documents, the rule that an entity must be declared is a
7523  * well-formedness constraint only if standalone='yes'.
7524  *
7525  * [ WFC: Parsed Entity ]
7526  * An entity reference must not contain the name of an unparsed entity
7527  *
7528  * Returns the xmlEntityPtr if found, or NULL otherwise.
7529  */
7530 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7531 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7532     const xmlChar *name;
7533     xmlEntityPtr ent = NULL;
7534 
7535     GROW;
7536     if (ctxt->instate == XML_PARSER_EOF)
7537         return(NULL);
7538 
7539     if (RAW != '&')
7540         return(NULL);
7541     NEXT;
7542     name = xmlParseName(ctxt);
7543     if (name == NULL) {
7544 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7545 		       "xmlParseEntityRef: no name\n");
7546         return(NULL);
7547     }
7548     if (RAW != ';') {
7549 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7550 	return(NULL);
7551     }
7552     NEXT;
7553 
7554     /*
7555      * Predefined entities override any extra definition
7556      */
7557     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558         ent = xmlGetPredefinedEntity(name);
7559         if (ent != NULL)
7560             return(ent);
7561     }
7562 
7563     /*
7564      * Increase the number of entity references parsed
7565      */
7566     ctxt->nbentities++;
7567 
7568     /*
7569      * Ask first SAX for entity resolution, otherwise try the
7570      * entities which may have stored in the parser context.
7571      */
7572     if (ctxt->sax != NULL) {
7573 	if (ctxt->sax->getEntity != NULL)
7574 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7575 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7576 	    (ctxt->options & XML_PARSE_OLDSAX))
7577 	    ent = xmlGetPredefinedEntity(name);
7578 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7579 	    (ctxt->userData==ctxt)) {
7580 	    ent = xmlSAX2GetEntity(ctxt, name);
7581 	}
7582     }
7583     if (ctxt->instate == XML_PARSER_EOF)
7584 	return(NULL);
7585     /*
7586      * [ WFC: Entity Declared ]
7587      * In a document without any DTD, a document with only an
7588      * internal DTD subset which contains no parameter entity
7589      * references, or a document with "standalone='yes'", the
7590      * Name given in the entity reference must match that in an
7591      * entity declaration, except that well-formed documents
7592      * need not declare any of the following entities: amp, lt,
7593      * gt, apos, quot.
7594      * The declaration of a parameter entity must precede any
7595      * reference to it.
7596      * Similarly, the declaration of a general entity must
7597      * precede any reference to it which appears in a default
7598      * value in an attribute-list declaration. Note that if
7599      * entities are declared in the external subset or in
7600      * external parameter entities, a non-validating processor
7601      * is not obligated to read and process their declarations;
7602      * for such documents, the rule that an entity must be
7603      * declared is a well-formedness constraint only if
7604      * standalone='yes'.
7605      */
7606     if (ent == NULL) {
7607 	if ((ctxt->standalone == 1) ||
7608 	    ((ctxt->hasExternalSubset == 0) &&
7609 	     (ctxt->hasPErefs == 0))) {
7610 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7611 		     "Entity '%s' not defined\n", name);
7612 	} else {
7613 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7614 		     "Entity '%s' not defined\n", name);
7615 	    if ((ctxt->inSubset == 0) &&
7616 		(ctxt->sax != NULL) &&
7617 		(ctxt->sax->reference != NULL)) {
7618 		ctxt->sax->reference(ctxt->userData, name);
7619 	    }
7620 	}
7621 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7622 	ctxt->valid = 0;
7623     }
7624 
7625     /*
7626      * [ WFC: Parsed Entity ]
7627      * An entity reference must not contain the name of an
7628      * unparsed entity
7629      */
7630     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7631 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7632 		 "Entity reference to unparsed entity %s\n", name);
7633     }
7634 
7635     /*
7636      * [ WFC: No External Entity References ]
7637      * Attribute values cannot contain direct or indirect
7638      * entity references to external entities.
7639      */
7640     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7641 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7642 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7643 	     "Attribute references external entity '%s'\n", name);
7644     }
7645     /*
7646      * [ WFC: No < in Attribute Values ]
7647      * The replacement text of any entity referred to directly or
7648      * indirectly in an attribute value (other than "&lt;") must
7649      * not contain a <.
7650      */
7651     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652 	     (ent != NULL) &&
7653 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7654 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7655 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7656 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7657 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7658         }
7659     }
7660 
7661     /*
7662      * Internal check, no parameter entities here ...
7663      */
7664     else {
7665 	switch (ent->etype) {
7666 	    case XML_INTERNAL_PARAMETER_ENTITY:
7667 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7668 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7669 	     "Attempt to reference the parameter entity '%s'\n",
7670 			      name);
7671 	    break;
7672 	    default:
7673 	    break;
7674 	}
7675     }
7676 
7677     /*
7678      * [ WFC: No Recursion ]
7679      * A parsed entity must not contain a recursive reference
7680      * to itself, either directly or indirectly.
7681      * Done somewhere else
7682      */
7683     return(ent);
7684 }
7685 
7686 /**
7687  * xmlParseStringEntityRef:
7688  * @ctxt:  an XML parser context
7689  * @str:  a pointer to an index in the string
7690  *
7691  * parse ENTITY references declarations, but this version parses it from
7692  * a string value.
7693  *
7694  * [68] EntityRef ::= '&' Name ';'
7695  *
7696  * [ WFC: Entity Declared ]
7697  * In a document without any DTD, a document with only an internal DTD
7698  * subset which contains no parameter entity references, or a document
7699  * with "standalone='yes'", the Name given in the entity reference
7700  * must match that in an entity declaration, except that well-formed
7701  * documents need not declare any of the following entities: amp, lt,
7702  * gt, apos, quot.  The declaration of a parameter entity must precede
7703  * any reference to it.  Similarly, the declaration of a general entity
7704  * must precede any reference to it which appears in a default value in an
7705  * attribute-list declaration. Note that if entities are declared in the
7706  * external subset or in external parameter entities, a non-validating
7707  * processor is not obligated to read and process their declarations;
7708  * for such documents, the rule that an entity must be declared is a
7709  * well-formedness constraint only if standalone='yes'.
7710  *
7711  * [ WFC: Parsed Entity ]
7712  * An entity reference must not contain the name of an unparsed entity
7713  *
7714  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715  * is updated to the current location in the string.
7716  */
7717 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7718 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7719     xmlChar *name;
7720     const xmlChar *ptr;
7721     xmlChar cur;
7722     xmlEntityPtr ent = NULL;
7723 
7724     if ((str == NULL) || (*str == NULL))
7725         return(NULL);
7726     ptr = *str;
7727     cur = *ptr;
7728     if (cur != '&')
7729 	return(NULL);
7730 
7731     ptr++;
7732     name = xmlParseStringName(ctxt, &ptr);
7733     if (name == NULL) {
7734 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7735 		       "xmlParseStringEntityRef: no name\n");
7736 	*str = ptr;
7737 	return(NULL);
7738     }
7739     if (*ptr != ';') {
7740 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7741         xmlFree(name);
7742 	*str = ptr;
7743 	return(NULL);
7744     }
7745     ptr++;
7746 
7747 
7748     /*
7749      * Predefined entities override any extra definition
7750      */
7751     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7752         ent = xmlGetPredefinedEntity(name);
7753         if (ent != NULL) {
7754             xmlFree(name);
7755             *str = ptr;
7756             return(ent);
7757         }
7758     }
7759 
7760     /*
7761      * Increase the number of entity references parsed
7762      */
7763     ctxt->nbentities++;
7764 
7765     /*
7766      * Ask first SAX for entity resolution, otherwise try the
7767      * entities which may have stored in the parser context.
7768      */
7769     if (ctxt->sax != NULL) {
7770 	if (ctxt->sax->getEntity != NULL)
7771 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7772 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7773 	    ent = xmlGetPredefinedEntity(name);
7774 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7775 	    ent = xmlSAX2GetEntity(ctxt, name);
7776 	}
7777     }
7778     if (ctxt->instate == XML_PARSER_EOF) {
7779 	xmlFree(name);
7780 	return(NULL);
7781     }
7782 
7783     /*
7784      * [ WFC: Entity Declared ]
7785      * In a document without any DTD, a document with only an
7786      * internal DTD subset which contains no parameter entity
7787      * references, or a document with "standalone='yes'", the
7788      * Name given in the entity reference must match that in an
7789      * entity declaration, except that well-formed documents
7790      * need not declare any of the following entities: amp, lt,
7791      * gt, apos, quot.
7792      * The declaration of a parameter entity must precede any
7793      * reference to it.
7794      * Similarly, the declaration of a general entity must
7795      * precede any reference to it which appears in a default
7796      * value in an attribute-list declaration. Note that if
7797      * entities are declared in the external subset or in
7798      * external parameter entities, a non-validating processor
7799      * is not obligated to read and process their declarations;
7800      * for such documents, the rule that an entity must be
7801      * declared is a well-formedness constraint only if
7802      * standalone='yes'.
7803      */
7804     if (ent == NULL) {
7805 	if ((ctxt->standalone == 1) ||
7806 	    ((ctxt->hasExternalSubset == 0) &&
7807 	     (ctxt->hasPErefs == 0))) {
7808 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7809 		     "Entity '%s' not defined\n", name);
7810 	} else {
7811 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7812 			  "Entity '%s' not defined\n",
7813 			  name);
7814 	}
7815 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7816 	/* TODO ? check regressions ctxt->valid = 0; */
7817     }
7818 
7819     /*
7820      * [ WFC: Parsed Entity ]
7821      * An entity reference must not contain the name of an
7822      * unparsed entity
7823      */
7824     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7825 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7826 		 "Entity reference to unparsed entity %s\n", name);
7827     }
7828 
7829     /*
7830      * [ WFC: No External Entity References ]
7831      * Attribute values cannot contain direct or indirect
7832      * entity references to external entities.
7833      */
7834     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7835 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7836 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7837 	 "Attribute references external entity '%s'\n", name);
7838     }
7839     /*
7840      * [ WFC: No < in Attribute Values ]
7841      * The replacement text of any entity referred to directly or
7842      * indirectly in an attribute value (other than "&lt;") must
7843      * not contain a <.
7844      */
7845     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7846 	     (ent != NULL) && (ent->content != NULL) &&
7847 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7848 	     (xmlStrchr(ent->content, '<'))) {
7849 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7850      "'<' in entity '%s' is not allowed in attributes values\n",
7851 			  name);
7852     }
7853 
7854     /*
7855      * Internal check, no parameter entities here ...
7856      */
7857     else {
7858 	switch (ent->etype) {
7859 	    case XML_INTERNAL_PARAMETER_ENTITY:
7860 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7861 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7862 	     "Attempt to reference the parameter entity '%s'\n",
7863 				  name);
7864 	    break;
7865 	    default:
7866 	    break;
7867 	}
7868     }
7869 
7870     /*
7871      * [ WFC: No Recursion ]
7872      * A parsed entity must not contain a recursive reference
7873      * to itself, either directly or indirectly.
7874      * Done somewhere else
7875      */
7876 
7877     xmlFree(name);
7878     *str = ptr;
7879     return(ent);
7880 }
7881 
7882 /**
7883  * xmlParsePEReference:
7884  * @ctxt:  an XML parser context
7885  *
7886  * parse PEReference declarations
7887  * The entity content is handled directly by pushing it's content as
7888  * a new input stream.
7889  *
7890  * [69] PEReference ::= '%' Name ';'
7891  *
7892  * [ WFC: No Recursion ]
7893  * A parsed entity must not contain a recursive
7894  * reference to itself, either directly or indirectly.
7895  *
7896  * [ WFC: Entity Declared ]
7897  * In a document without any DTD, a document with only an internal DTD
7898  * subset which contains no parameter entity references, or a document
7899  * with "standalone='yes'", ...  ... The declaration of a parameter
7900  * entity must precede any reference to it...
7901  *
7902  * [ VC: Entity Declared ]
7903  * In a document with an external subset or external parameter entities
7904  * with "standalone='no'", ...  ... The declaration of a parameter entity
7905  * must precede any reference to it...
7906  *
7907  * [ WFC: In DTD ]
7908  * Parameter-entity references may only appear in the DTD.
7909  * NOTE: misleading but this is handled.
7910  */
7911 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7912 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7913 {
7914     const xmlChar *name;
7915     xmlEntityPtr entity = NULL;
7916     xmlParserInputPtr input;
7917 
7918     if (RAW != '%')
7919         return;
7920     NEXT;
7921     name = xmlParseName(ctxt);
7922     if (name == NULL) {
7923 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7924 	return;
7925     }
7926     if (xmlParserDebugEntities)
7927 	xmlGenericError(xmlGenericErrorContext,
7928 		"PEReference: %s\n", name);
7929     if (RAW != ';') {
7930 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7931         return;
7932     }
7933 
7934     NEXT;
7935 
7936     /*
7937      * Increase the number of entity references parsed
7938      */
7939     ctxt->nbentities++;
7940 
7941     /*
7942      * Request the entity from SAX
7943      */
7944     if ((ctxt->sax != NULL) &&
7945 	(ctxt->sax->getParameterEntity != NULL))
7946 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7947     if (ctxt->instate == XML_PARSER_EOF)
7948 	return;
7949     if (entity == NULL) {
7950 	/*
7951 	 * [ WFC: Entity Declared ]
7952 	 * In a document without any DTD, a document with only an
7953 	 * internal DTD subset which contains no parameter entity
7954 	 * references, or a document with "standalone='yes'", ...
7955 	 * ... The declaration of a parameter entity must precede
7956 	 * any reference to it...
7957 	 */
7958 	if ((ctxt->standalone == 1) ||
7959 	    ((ctxt->hasExternalSubset == 0) &&
7960 	     (ctxt->hasPErefs == 0))) {
7961 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7962 			      "PEReference: %%%s; not found\n",
7963 			      name);
7964 	} else {
7965 	    /*
7966 	     * [ VC: Entity Declared ]
7967 	     * In a document with an external subset or external
7968 	     * parameter entities with "standalone='no'", ...
7969 	     * ... The declaration of a parameter entity must
7970 	     * precede any reference to it...
7971 	     */
7972             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7973                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7974                                  "PEReference: %%%s; not found\n",
7975                                  name, NULL);
7976             } else
7977                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7978                               "PEReference: %%%s; not found\n",
7979                               name, NULL);
7980             ctxt->valid = 0;
7981 	}
7982 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7983     } else {
7984 	/*
7985 	 * Internal checking in case the entity quest barfed
7986 	 */
7987 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7988 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7989 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7990 		  "Internal: %%%s; is not a parameter entity\n",
7991 			  name, NULL);
7992 	} else {
7993             xmlChar start[4];
7994             xmlCharEncoding enc;
7995 
7996 	    if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7997 	        return;
7998 
7999 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8000 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8001 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8002 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8003 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8004 		(ctxt->replaceEntities == 0) &&
8005 		(ctxt->validate == 0))
8006 		return;
8007 
8008 	    input = xmlNewEntityInputStream(ctxt, entity);
8009 	    if (xmlPushInput(ctxt, input) < 0) {
8010                 xmlFreeInputStream(input);
8011 		return;
8012             }
8013 
8014 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8015                 /*
8016                  * Get the 4 first bytes and decode the charset
8017                  * if enc != XML_CHAR_ENCODING_NONE
8018                  * plug some encoding conversion routines.
8019                  * Note that, since we may have some non-UTF8
8020                  * encoding (like UTF16, bug 135229), the 'length'
8021                  * is not known, but we can calculate based upon
8022                  * the amount of data in the buffer.
8023                  */
8024                 GROW
8025                 if (ctxt->instate == XML_PARSER_EOF)
8026                     return;
8027                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8028                     start[0] = RAW;
8029                     start[1] = NXT(1);
8030                     start[2] = NXT(2);
8031                     start[3] = NXT(3);
8032                     enc = xmlDetectCharEncoding(start, 4);
8033                     if (enc != XML_CHAR_ENCODING_NONE) {
8034                         xmlSwitchEncoding(ctxt, enc);
8035                     }
8036                 }
8037 
8038                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8039                     (IS_BLANK_CH(NXT(5)))) {
8040                     xmlParseTextDecl(ctxt);
8041                 }
8042             }
8043 	}
8044     }
8045     ctxt->hasPErefs = 1;
8046 }
8047 
8048 /**
8049  * xmlLoadEntityContent:
8050  * @ctxt:  an XML parser context
8051  * @entity: an unloaded system entity
8052  *
8053  * Load the original content of the given system entity from the
8054  * ExternalID/SystemID given. This is to be used for Included in Literal
8055  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056  *
8057  * Returns 0 in case of success and -1 in case of failure
8058  */
8059 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8060 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061     xmlParserInputPtr input;
8062     xmlBufferPtr buf;
8063     int l, c;
8064     int count = 0;
8065 
8066     if ((ctxt == NULL) || (entity == NULL) ||
8067         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069 	(entity->content != NULL)) {
8070 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071 	            "xmlLoadEntityContent parameter error");
8072         return(-1);
8073     }
8074 
8075     if (xmlParserDebugEntities)
8076 	xmlGenericError(xmlGenericErrorContext,
8077 		"Reading %s entity content input\n", entity->name);
8078 
8079     buf = xmlBufferCreate();
8080     if (buf == NULL) {
8081 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 	            "xmlLoadEntityContent parameter error");
8083         return(-1);
8084     }
8085     xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8086 
8087     input = xmlNewEntityInputStream(ctxt, entity);
8088     if (input == NULL) {
8089 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8090 	            "xmlLoadEntityContent input error");
8091 	xmlBufferFree(buf);
8092         return(-1);
8093     }
8094 
8095     /*
8096      * Push the entity as the current input, read char by char
8097      * saving to the buffer until the end of the entity or an error
8098      */
8099     if (xmlPushInput(ctxt, input) < 0) {
8100         xmlBufferFree(buf);
8101 	xmlFreeInputStream(input);
8102 	return(-1);
8103     }
8104 
8105     GROW;
8106     c = CUR_CHAR(l);
8107     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8108            (IS_CHAR(c))) {
8109         xmlBufferAdd(buf, ctxt->input->cur, l);
8110 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8111 	    count = 0;
8112 	    GROW;
8113             if (ctxt->instate == XML_PARSER_EOF) {
8114                 xmlBufferFree(buf);
8115                 return(-1);
8116             }
8117 	}
8118 	NEXTL(l);
8119 	c = CUR_CHAR(l);
8120 	if (c == 0) {
8121 	    count = 0;
8122 	    GROW;
8123             if (ctxt->instate == XML_PARSER_EOF) {
8124                 xmlBufferFree(buf);
8125                 return(-1);
8126             }
8127 	    c = CUR_CHAR(l);
8128 	}
8129     }
8130 
8131     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8132         xmlPopInput(ctxt);
8133     } else if (!IS_CHAR(c)) {
8134         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8135                           "xmlLoadEntityContent: invalid char value %d\n",
8136 	                  c);
8137 	xmlBufferFree(buf);
8138 	return(-1);
8139     }
8140     entity->content = buf->content;
8141     buf->content = NULL;
8142     xmlBufferFree(buf);
8143 
8144     return(0);
8145 }
8146 
8147 /**
8148  * xmlParseStringPEReference:
8149  * @ctxt:  an XML parser context
8150  * @str:  a pointer to an index in the string
8151  *
8152  * parse PEReference declarations
8153  *
8154  * [69] PEReference ::= '%' Name ';'
8155  *
8156  * [ WFC: No Recursion ]
8157  * A parsed entity must not contain a recursive
8158  * reference to itself, either directly or indirectly.
8159  *
8160  * [ WFC: Entity Declared ]
8161  * In a document without any DTD, a document with only an internal DTD
8162  * subset which contains no parameter entity references, or a document
8163  * with "standalone='yes'", ...  ... The declaration of a parameter
8164  * entity must precede any reference to it...
8165  *
8166  * [ VC: Entity Declared ]
8167  * In a document with an external subset or external parameter entities
8168  * with "standalone='no'", ...  ... The declaration of a parameter entity
8169  * must precede any reference to it...
8170  *
8171  * [ WFC: In DTD ]
8172  * Parameter-entity references may only appear in the DTD.
8173  * NOTE: misleading but this is handled.
8174  *
8175  * Returns the string of the entity content.
8176  *         str is updated to the current value of the index
8177  */
8178 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8179 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8180     const xmlChar *ptr;
8181     xmlChar cur;
8182     xmlChar *name;
8183     xmlEntityPtr entity = NULL;
8184 
8185     if ((str == NULL) || (*str == NULL)) return(NULL);
8186     ptr = *str;
8187     cur = *ptr;
8188     if (cur != '%')
8189         return(NULL);
8190     ptr++;
8191     name = xmlParseStringName(ctxt, &ptr);
8192     if (name == NULL) {
8193 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194 		       "xmlParseStringPEReference: no name\n");
8195 	*str = ptr;
8196 	return(NULL);
8197     }
8198     cur = *ptr;
8199     if (cur != ';') {
8200 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8201 	xmlFree(name);
8202 	*str = ptr;
8203 	return(NULL);
8204     }
8205     ptr++;
8206 
8207     /*
8208      * Increase the number of entity references parsed
8209      */
8210     ctxt->nbentities++;
8211 
8212     /*
8213      * Request the entity from SAX
8214      */
8215     if ((ctxt->sax != NULL) &&
8216 	(ctxt->sax->getParameterEntity != NULL))
8217 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8218     if (ctxt->instate == XML_PARSER_EOF) {
8219 	xmlFree(name);
8220 	*str = ptr;
8221 	return(NULL);
8222     }
8223     if (entity == NULL) {
8224 	/*
8225 	 * [ WFC: Entity Declared ]
8226 	 * In a document without any DTD, a document with only an
8227 	 * internal DTD subset which contains no parameter entity
8228 	 * references, or a document with "standalone='yes'", ...
8229 	 * ... The declaration of a parameter entity must precede
8230 	 * any reference to it...
8231 	 */
8232 	if ((ctxt->standalone == 1) ||
8233 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8234 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8235 		 "PEReference: %%%s; not found\n", name);
8236 	} else {
8237 	    /*
8238 	     * [ VC: Entity Declared ]
8239 	     * In a document with an external subset or external
8240 	     * parameter entities with "standalone='no'", ...
8241 	     * ... The declaration of a parameter entity must
8242 	     * precede any reference to it...
8243 	     */
8244 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8245 			  "PEReference: %%%s; not found\n",
8246 			  name, NULL);
8247 	    ctxt->valid = 0;
8248 	}
8249 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8250     } else {
8251 	/*
8252 	 * Internal checking in case the entity quest barfed
8253 	 */
8254 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8255 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8256 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8257 			  "%%%s; is not a parameter entity\n",
8258 			  name, NULL);
8259 	}
8260     }
8261     ctxt->hasPErefs = 1;
8262     xmlFree(name);
8263     *str = ptr;
8264     return(entity);
8265 }
8266 
8267 /**
8268  * xmlParseDocTypeDecl:
8269  * @ctxt:  an XML parser context
8270  *
8271  * parse a DOCTYPE declaration
8272  *
8273  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8274  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8275  *
8276  * [ VC: Root Element Type ]
8277  * The Name in the document type declaration must match the element
8278  * type of the root element.
8279  */
8280 
8281 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8282 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8283     const xmlChar *name = NULL;
8284     xmlChar *ExternalID = NULL;
8285     xmlChar *URI = NULL;
8286 
8287     /*
8288      * We know that '<!DOCTYPE' has been detected.
8289      */
8290     SKIP(9);
8291 
8292     SKIP_BLANKS;
8293 
8294     /*
8295      * Parse the DOCTYPE name.
8296      */
8297     name = xmlParseName(ctxt);
8298     if (name == NULL) {
8299 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8300 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8301     }
8302     ctxt->intSubName = name;
8303 
8304     SKIP_BLANKS;
8305 
8306     /*
8307      * Check for SystemID and ExternalID
8308      */
8309     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8310 
8311     if ((URI != NULL) || (ExternalID != NULL)) {
8312         ctxt->hasExternalSubset = 1;
8313     }
8314     ctxt->extSubURI = URI;
8315     ctxt->extSubSystem = ExternalID;
8316 
8317     SKIP_BLANKS;
8318 
8319     /*
8320      * Create and update the internal subset.
8321      */
8322     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8323 	(!ctxt->disableSAX))
8324 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8325     if (ctxt->instate == XML_PARSER_EOF)
8326 	return;
8327 
8328     /*
8329      * Is there any internal subset declarations ?
8330      * they are handled separately in xmlParseInternalSubset()
8331      */
8332     if (RAW == '[')
8333 	return;
8334 
8335     /*
8336      * We should be at the end of the DOCTYPE declaration.
8337      */
8338     if (RAW != '>') {
8339 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8340     }
8341     NEXT;
8342 }
8343 
8344 /**
8345  * xmlParseInternalSubset:
8346  * @ctxt:  an XML parser context
8347  *
8348  * parse the internal subset declaration
8349  *
8350  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8351  */
8352 
8353 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8354 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8355     /*
8356      * Is there any DTD definition ?
8357      */
8358     if (RAW == '[') {
8359         int baseInputNr = ctxt->inputNr;
8360         ctxt->instate = XML_PARSER_DTD;
8361         NEXT;
8362 	/*
8363 	 * Parse the succession of Markup declarations and
8364 	 * PEReferences.
8365 	 * Subsequence (markupdecl | PEReference | S)*
8366 	 */
8367 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8368                (ctxt->instate != XML_PARSER_EOF)) {
8369 	    int id = ctxt->input->id;
8370 	    unsigned long cons = CUR_CONSUMED;
8371 
8372 	    SKIP_BLANKS;
8373 	    xmlParseMarkupDecl(ctxt);
8374 	    xmlParsePEReference(ctxt);
8375 
8376             /*
8377              * Conditional sections are allowed from external entities included
8378              * by PE References in the internal subset.
8379              */
8380             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8381                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8382                 xmlParseConditionalSections(ctxt);
8383             }
8384 
8385 	    if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8386 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8387 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8388                 if (ctxt->inputNr > baseInputNr)
8389                     xmlPopInput(ctxt);
8390                 else
8391 		    break;
8392 	    }
8393 	}
8394 	if (RAW == ']') {
8395 	    NEXT;
8396 	    SKIP_BLANKS;
8397 	}
8398     }
8399 
8400     /*
8401      * We should be at the end of the DOCTYPE declaration.
8402      */
8403     if (RAW != '>') {
8404 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8405 	return;
8406     }
8407     NEXT;
8408 }
8409 
8410 #ifdef LIBXML_SAX1_ENABLED
8411 /**
8412  * xmlParseAttribute:
8413  * @ctxt:  an XML parser context
8414  * @value:  a xmlChar ** used to store the value of the attribute
8415  *
8416  * parse an attribute
8417  *
8418  * [41] Attribute ::= Name Eq AttValue
8419  *
8420  * [ WFC: No External Entity References ]
8421  * Attribute values cannot contain direct or indirect entity references
8422  * to external entities.
8423  *
8424  * [ WFC: No < in Attribute Values ]
8425  * The replacement text of any entity referred to directly or indirectly in
8426  * an attribute value (other than "&lt;") must not contain a <.
8427  *
8428  * [ VC: Attribute Value Type ]
8429  * The attribute must have been declared; the value must be of the type
8430  * declared for it.
8431  *
8432  * [25] Eq ::= S? '=' S?
8433  *
8434  * With namespace:
8435  *
8436  * [NS 11] Attribute ::= QName Eq AttValue
8437  *
8438  * Also the case QName == xmlns:??? is handled independently as a namespace
8439  * definition.
8440  *
8441  * Returns the attribute name, and the value in *value.
8442  */
8443 
8444 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8445 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8446     const xmlChar *name;
8447     xmlChar *val;
8448 
8449     *value = NULL;
8450     GROW;
8451     name = xmlParseName(ctxt);
8452     if (name == NULL) {
8453 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8454 	               "error parsing attribute name\n");
8455         return(NULL);
8456     }
8457 
8458     /*
8459      * read the value
8460      */
8461     SKIP_BLANKS;
8462     if (RAW == '=') {
8463         NEXT;
8464 	SKIP_BLANKS;
8465 	val = xmlParseAttValue(ctxt);
8466 	ctxt->instate = XML_PARSER_CONTENT;
8467     } else {
8468 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8469 	       "Specification mandates value for attribute %s\n", name);
8470 	return(NULL);
8471     }
8472 
8473     /*
8474      * Check that xml:lang conforms to the specification
8475      * No more registered as an error, just generate a warning now
8476      * since this was deprecated in XML second edition
8477      */
8478     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8479 	if (!xmlCheckLanguageID(val)) {
8480 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8481 		          "Malformed value for xml:lang : %s\n",
8482 			  val, NULL);
8483 	}
8484     }
8485 
8486     /*
8487      * Check that xml:space conforms to the specification
8488      */
8489     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8490 	if (xmlStrEqual(val, BAD_CAST "default"))
8491 	    *(ctxt->space) = 0;
8492 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8493 	    *(ctxt->space) = 1;
8494 	else {
8495 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8496 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8497                                  val, NULL);
8498 	}
8499     }
8500 
8501     *value = val;
8502     return(name);
8503 }
8504 
8505 /**
8506  * xmlParseStartTag:
8507  * @ctxt:  an XML parser context
8508  *
8509  * parse a start of tag either for rule element or
8510  * EmptyElement. In both case we don't parse the tag closing chars.
8511  *
8512  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8513  *
8514  * [ WFC: Unique Att Spec ]
8515  * No attribute name may appear more than once in the same start-tag or
8516  * empty-element tag.
8517  *
8518  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8519  *
8520  * [ WFC: Unique Att Spec ]
8521  * No attribute name may appear more than once in the same start-tag or
8522  * empty-element tag.
8523  *
8524  * With namespace:
8525  *
8526  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8527  *
8528  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8529  *
8530  * Returns the element name parsed
8531  */
8532 
8533 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8534 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8535     const xmlChar *name;
8536     const xmlChar *attname;
8537     xmlChar *attvalue;
8538     const xmlChar **atts = ctxt->atts;
8539     int nbatts = 0;
8540     int maxatts = ctxt->maxatts;
8541     int i;
8542 
8543     if (RAW != '<') return(NULL);
8544     NEXT1;
8545 
8546     name = xmlParseName(ctxt);
8547     if (name == NULL) {
8548 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8549 	     "xmlParseStartTag: invalid element name\n");
8550         return(NULL);
8551     }
8552 
8553     /*
8554      * Now parse the attributes, it ends up with the ending
8555      *
8556      * (S Attribute)* S?
8557      */
8558     SKIP_BLANKS;
8559     GROW;
8560 
8561     while (((RAW != '>') &&
8562 	   ((RAW != '/') || (NXT(1) != '>')) &&
8563 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8564         int id = ctxt->input->id;
8565 	unsigned long cons = CUR_CONSUMED;
8566 
8567 	attname = xmlParseAttribute(ctxt, &attvalue);
8568         if ((attname != NULL) && (attvalue != NULL)) {
8569 	    /*
8570 	     * [ WFC: Unique Att Spec ]
8571 	     * No attribute name may appear more than once in the same
8572 	     * start-tag or empty-element tag.
8573 	     */
8574 	    for (i = 0; i < nbatts;i += 2) {
8575 	        if (xmlStrEqual(atts[i], attname)) {
8576 		    xmlErrAttributeDup(ctxt, NULL, attname);
8577 		    xmlFree(attvalue);
8578 		    goto failed;
8579 		}
8580 	    }
8581 	    /*
8582 	     * Add the pair to atts
8583 	     */
8584 	    if (atts == NULL) {
8585 	        maxatts = 22; /* allow for 10 attrs by default */
8586 	        atts = (const xmlChar **)
8587 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8588 		if (atts == NULL) {
8589 		    xmlErrMemory(ctxt, NULL);
8590 		    if (attvalue != NULL)
8591 			xmlFree(attvalue);
8592 		    goto failed;
8593 		}
8594 		ctxt->atts = atts;
8595 		ctxt->maxatts = maxatts;
8596 	    } else if (nbatts + 4 > maxatts) {
8597 	        const xmlChar **n;
8598 
8599 	        maxatts *= 2;
8600 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8601 					     maxatts * sizeof(const xmlChar *));
8602 		if (n == NULL) {
8603 		    xmlErrMemory(ctxt, NULL);
8604 		    if (attvalue != NULL)
8605 			xmlFree(attvalue);
8606 		    goto failed;
8607 		}
8608 		atts = n;
8609 		ctxt->atts = atts;
8610 		ctxt->maxatts = maxatts;
8611 	    }
8612 	    atts[nbatts++] = attname;
8613 	    atts[nbatts++] = attvalue;
8614 	    atts[nbatts] = NULL;
8615 	    atts[nbatts + 1] = NULL;
8616 	} else {
8617 	    if (attvalue != NULL)
8618 		xmlFree(attvalue);
8619 	}
8620 
8621 failed:
8622 
8623 	GROW
8624 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8625 	    break;
8626 	if (SKIP_BLANKS == 0) {
8627 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8628 			   "attributes construct error\n");
8629 	}
8630         if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8631             (attname == NULL) && (attvalue == NULL)) {
8632 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8633 			   "xmlParseStartTag: problem parsing attributes\n");
8634 	    break;
8635 	}
8636 	SHRINK;
8637         GROW;
8638     }
8639 
8640     /*
8641      * SAX: Start of Element !
8642      */
8643     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8644 	(!ctxt->disableSAX)) {
8645 	if (nbatts > 0)
8646 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8647 	else
8648 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8649     }
8650 
8651     if (atts != NULL) {
8652         /* Free only the content strings */
8653         for (i = 1;i < nbatts;i+=2)
8654 	    if (atts[i] != NULL)
8655 	       xmlFree((xmlChar *) atts[i]);
8656     }
8657     return(name);
8658 }
8659 
8660 /**
8661  * xmlParseEndTag1:
8662  * @ctxt:  an XML parser context
8663  * @line:  line of the start tag
8664  * @nsNr:  number of namespaces on the start tag
8665  *
8666  * parse an end of tag
8667  *
8668  * [42] ETag ::= '</' Name S? '>'
8669  *
8670  * With namespace
8671  *
8672  * [NS 9] ETag ::= '</' QName S? '>'
8673  */
8674 
8675 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8676 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8677     const xmlChar *name;
8678 
8679     GROW;
8680     if ((RAW != '<') || (NXT(1) != '/')) {
8681 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8682 		       "xmlParseEndTag: '</' not found\n");
8683 	return;
8684     }
8685     SKIP(2);
8686 
8687     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8688 
8689     /*
8690      * We should definitely be at the ending "S? '>'" part
8691      */
8692     GROW;
8693     SKIP_BLANKS;
8694     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8695 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8696     } else
8697 	NEXT1;
8698 
8699     /*
8700      * [ WFC: Element Type Match ]
8701      * The Name in an element's end-tag must match the element type in the
8702      * start-tag.
8703      *
8704      */
8705     if (name != (xmlChar*)1) {
8706         if (name == NULL) name = BAD_CAST "unparsable";
8707         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8708 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8709 		                ctxt->name, line, name);
8710     }
8711 
8712     /*
8713      * SAX: End of Tag
8714      */
8715     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8716 	(!ctxt->disableSAX))
8717         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8718 
8719     namePop(ctxt);
8720     spacePop(ctxt);
8721     return;
8722 }
8723 
8724 /**
8725  * xmlParseEndTag:
8726  * @ctxt:  an XML parser context
8727  *
8728  * parse an end of tag
8729  *
8730  * [42] ETag ::= '</' Name S? '>'
8731  *
8732  * With namespace
8733  *
8734  * [NS 9] ETag ::= '</' QName S? '>'
8735  */
8736 
8737 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8738 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8739     xmlParseEndTag1(ctxt, 0);
8740 }
8741 #endif /* LIBXML_SAX1_ENABLED */
8742 
8743 /************************************************************************
8744  *									*
8745  *		      SAX 2 specific operations				*
8746  *									*
8747  ************************************************************************/
8748 
8749 /*
8750  * xmlGetNamespace:
8751  * @ctxt:  an XML parser context
8752  * @prefix:  the prefix to lookup
8753  *
8754  * Lookup the namespace name for the @prefix (which ca be NULL)
8755  * The prefix must come from the @ctxt->dict dictionary
8756  *
8757  * Returns the namespace name or NULL if not bound
8758  */
8759 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8760 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8761     int i;
8762 
8763     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8764     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8765         if (ctxt->nsTab[i] == prefix) {
8766 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8767 	        return(NULL);
8768 	    return(ctxt->nsTab[i + 1]);
8769 	}
8770     return(NULL);
8771 }
8772 
8773 /**
8774  * xmlParseQName:
8775  * @ctxt:  an XML parser context
8776  * @prefix:  pointer to store the prefix part
8777  *
8778  * parse an XML Namespace QName
8779  *
8780  * [6]  QName  ::= (Prefix ':')? LocalPart
8781  * [7]  Prefix  ::= NCName
8782  * [8]  LocalPart  ::= NCName
8783  *
8784  * Returns the Name parsed or NULL
8785  */
8786 
8787 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8788 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8789     const xmlChar *l, *p;
8790 
8791     GROW;
8792 
8793     l = xmlParseNCName(ctxt);
8794     if (l == NULL) {
8795         if (CUR == ':') {
8796 	    l = xmlParseName(ctxt);
8797 	    if (l != NULL) {
8798 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8800 		*prefix = NULL;
8801 		return(l);
8802 	    }
8803 	}
8804         return(NULL);
8805     }
8806     if (CUR == ':') {
8807         NEXT;
8808 	p = l;
8809 	l = xmlParseNCName(ctxt);
8810 	if (l == NULL) {
8811 	    xmlChar *tmp;
8812 
8813             if (ctxt->instate == XML_PARSER_EOF)
8814                 return(NULL);
8815             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8816 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8817 	    l = xmlParseNmtoken(ctxt);
8818 	    if (l == NULL) {
8819                 if (ctxt->instate == XML_PARSER_EOF)
8820                     return(NULL);
8821 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8822             } else {
8823 		tmp = xmlBuildQName(l, p, NULL, 0);
8824 		xmlFree((char *)l);
8825 	    }
8826 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8827 	    if (tmp != NULL) xmlFree(tmp);
8828 	    *prefix = NULL;
8829 	    return(p);
8830 	}
8831 	if (CUR == ':') {
8832 	    xmlChar *tmp;
8833 
8834             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8835 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8836 	    NEXT;
8837 	    tmp = (xmlChar *) xmlParseName(ctxt);
8838 	    if (tmp != NULL) {
8839 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8840 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8841 		if (tmp != NULL) xmlFree(tmp);
8842 		*prefix = p;
8843 		return(l);
8844 	    }
8845             if (ctxt->instate == XML_PARSER_EOF)
8846                 return(NULL);
8847 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8848 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8849 	    if (tmp != NULL) xmlFree(tmp);
8850 	    *prefix = p;
8851 	    return(l);
8852 	}
8853 	*prefix = p;
8854     } else
8855         *prefix = NULL;
8856     return(l);
8857 }
8858 
8859 /**
8860  * xmlParseQNameAndCompare:
8861  * @ctxt:  an XML parser context
8862  * @name:  the localname
8863  * @prefix:  the prefix, if any.
8864  *
8865  * parse an XML name and compares for match
8866  * (specialized for endtag parsing)
8867  *
8868  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8869  * and the name for mismatch
8870  */
8871 
8872 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8873 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8874                         xmlChar const *prefix) {
8875     const xmlChar *cmp;
8876     const xmlChar *in;
8877     const xmlChar *ret;
8878     const xmlChar *prefix2;
8879 
8880     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8881 
8882     GROW;
8883     in = ctxt->input->cur;
8884 
8885     cmp = prefix;
8886     while (*in != 0 && *in == *cmp) {
8887 	++in;
8888 	++cmp;
8889     }
8890     if ((*cmp == 0) && (*in == ':')) {
8891         in++;
8892 	cmp = name;
8893 	while (*in != 0 && *in == *cmp) {
8894 	    ++in;
8895 	    ++cmp;
8896 	}
8897 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8898 	    /* success */
8899             ctxt->input->col += in - ctxt->input->cur;
8900 	    ctxt->input->cur = in;
8901 	    return((const xmlChar*) 1);
8902 	}
8903     }
8904     /*
8905      * all strings coms from the dictionary, equality can be done directly
8906      */
8907     ret = xmlParseQName (ctxt, &prefix2);
8908     if ((ret == name) && (prefix == prefix2))
8909 	return((const xmlChar*) 1);
8910     return ret;
8911 }
8912 
8913 /**
8914  * xmlParseAttValueInternal:
8915  * @ctxt:  an XML parser context
8916  * @len:  attribute len result
8917  * @alloc:  whether the attribute was reallocated as a new string
8918  * @normalize:  if 1 then further non-CDATA normalization must be done
8919  *
8920  * parse a value for an attribute.
8921  * NOTE: if no normalization is needed, the routine will return pointers
8922  *       directly from the data buffer.
8923  *
8924  * 3.3.3 Attribute-Value Normalization:
8925  * Before the value of an attribute is passed to the application or
8926  * checked for validity, the XML processor must normalize it as follows:
8927  * - a character reference is processed by appending the referenced
8928  *   character to the attribute value
8929  * - an entity reference is processed by recursively processing the
8930  *   replacement text of the entity
8931  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8932  *   appending #x20 to the normalized value, except that only a single
8933  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8934  *   parsed entity or the literal entity value of an internal parsed entity
8935  * - other characters are processed by appending them to the normalized value
8936  * If the declared value is not CDATA, then the XML processor must further
8937  * process the normalized attribute value by discarding any leading and
8938  * trailing space (#x20) characters, and by replacing sequences of space
8939  * (#x20) characters by a single space (#x20) character.
8940  * All attributes for which no declaration has been read should be treated
8941  * by a non-validating parser as if declared CDATA.
8942  *
8943  * Returns the AttValue parsed or NULL. The value has to be freed by the
8944  *     caller if it was copied, this can be detected by val[*len] == 0.
8945  */
8946 
8947 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8948     const xmlChar *oldbase = ctxt->input->base;\
8949     GROW;\
8950     if (ctxt->instate == XML_PARSER_EOF)\
8951         return(NULL);\
8952     if (oldbase != ctxt->input->base) {\
8953         ptrdiff_t delta = ctxt->input->base - oldbase;\
8954         start = start + delta;\
8955         in = in + delta;\
8956     }\
8957     end = ctxt->input->end;
8958 
8959 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8960 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8961                          int normalize)
8962 {
8963     xmlChar limit = 0;
8964     const xmlChar *in = NULL, *start, *end, *last;
8965     xmlChar *ret = NULL;
8966     int line, col;
8967     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8968                     XML_MAX_HUGE_LENGTH :
8969                     XML_MAX_TEXT_LENGTH;
8970 
8971     GROW;
8972     in = (xmlChar *) CUR_PTR;
8973     line = ctxt->input->line;
8974     col = ctxt->input->col;
8975     if (*in != '"' && *in != '\'') {
8976         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8977         return (NULL);
8978     }
8979     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8980 
8981     /*
8982      * try to handle in this routine the most common case where no
8983      * allocation of a new string is required and where content is
8984      * pure ASCII.
8985      */
8986     limit = *in++;
8987     col++;
8988     end = ctxt->input->end;
8989     start = in;
8990     if (in >= end) {
8991         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992     }
8993     if (normalize) {
8994         /*
8995 	 * Skip any leading spaces
8996 	 */
8997 	while ((in < end) && (*in != limit) &&
8998 	       ((*in == 0x20) || (*in == 0x9) ||
8999 	        (*in == 0xA) || (*in == 0xD))) {
9000 	    if (*in == 0xA) {
9001 	        line++; col = 1;
9002 	    } else {
9003 	        col++;
9004 	    }
9005 	    in++;
9006 	    start = in;
9007 	    if (in >= end) {
9008                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9009                 if ((in - start) > maxLength) {
9010                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9011                                    "AttValue length too long\n");
9012                     return(NULL);
9013                 }
9014 	    }
9015 	}
9016 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9017 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9018 	    col++;
9019 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9020 	    if (in >= end) {
9021                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9022                 if ((in - start) > maxLength) {
9023                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9024                                    "AttValue length too long\n");
9025                     return(NULL);
9026                 }
9027 	    }
9028 	}
9029 	last = in;
9030 	/*
9031 	 * skip the trailing blanks
9032 	 */
9033 	while ((last[-1] == 0x20) && (last > start)) last--;
9034 	while ((in < end) && (*in != limit) &&
9035 	       ((*in == 0x20) || (*in == 0x9) ||
9036 	        (*in == 0xA) || (*in == 0xD))) {
9037 	    if (*in == 0xA) {
9038 	        line++, col = 1;
9039 	    } else {
9040 	        col++;
9041 	    }
9042 	    in++;
9043 	    if (in >= end) {
9044 		const xmlChar *oldbase = ctxt->input->base;
9045 		GROW;
9046                 if (ctxt->instate == XML_PARSER_EOF)
9047                     return(NULL);
9048 		if (oldbase != ctxt->input->base) {
9049 		    ptrdiff_t delta = ctxt->input->base - oldbase;
9050 		    start = start + delta;
9051 		    in = in + delta;
9052 		    last = last + delta;
9053 		}
9054 		end = ctxt->input->end;
9055                 if ((in - start) > maxLength) {
9056                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9057                                    "AttValue length too long\n");
9058                     return(NULL);
9059                 }
9060 	    }
9061 	}
9062         if ((in - start) > maxLength) {
9063             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064                            "AttValue length too long\n");
9065             return(NULL);
9066         }
9067 	if (*in != limit) goto need_complex;
9068     } else {
9069 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9070 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9071 	    in++;
9072 	    col++;
9073 	    if (in >= end) {
9074                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9075                 if ((in - start) > maxLength) {
9076                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9077                                    "AttValue length too long\n");
9078                     return(NULL);
9079                 }
9080 	    }
9081 	}
9082 	last = in;
9083         if ((in - start) > maxLength) {
9084             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9085                            "AttValue length too long\n");
9086             return(NULL);
9087         }
9088 	if (*in != limit) goto need_complex;
9089     }
9090     in++;
9091     col++;
9092     if (len != NULL) {
9093         *len = last - start;
9094         ret = (xmlChar *) start;
9095     } else {
9096         if (alloc) *alloc = 1;
9097         ret = xmlStrndup(start, last - start);
9098     }
9099     CUR_PTR = in;
9100     ctxt->input->line = line;
9101     ctxt->input->col = col;
9102     if (alloc) *alloc = 0;
9103     return ret;
9104 need_complex:
9105     if (alloc) *alloc = 1;
9106     return xmlParseAttValueComplex(ctxt, len, normalize);
9107 }
9108 
9109 /**
9110  * xmlParseAttribute2:
9111  * @ctxt:  an XML parser context
9112  * @pref:  the element prefix
9113  * @elem:  the element name
9114  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9115  * @value:  a xmlChar ** used to store the value of the attribute
9116  * @len:  an int * to save the length of the attribute
9117  * @alloc:  an int * to indicate if the attribute was allocated
9118  *
9119  * parse an attribute in the new SAX2 framework.
9120  *
9121  * Returns the attribute name, and the value in *value, .
9122  */
9123 
9124 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9125 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9126                    const xmlChar * pref, const xmlChar * elem,
9127                    const xmlChar ** prefix, xmlChar ** value,
9128                    int *len, int *alloc)
9129 {
9130     const xmlChar *name;
9131     xmlChar *val, *internal_val = NULL;
9132     int normalize = 0;
9133 
9134     *value = NULL;
9135     GROW;
9136     name = xmlParseQName(ctxt, prefix);
9137     if (name == NULL) {
9138         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9139                        "error parsing attribute name\n");
9140         return (NULL);
9141     }
9142 
9143     /*
9144      * get the type if needed
9145      */
9146     if (ctxt->attsSpecial != NULL) {
9147         int type;
9148 
9149         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9150                                                  pref, elem, *prefix, name);
9151         if (type != 0)
9152             normalize = 1;
9153     }
9154 
9155     /*
9156      * read the value
9157      */
9158     SKIP_BLANKS;
9159     if (RAW == '=') {
9160         NEXT;
9161         SKIP_BLANKS;
9162         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9163 	if (normalize) {
9164 	    /*
9165 	     * Sometimes a second normalisation pass for spaces is needed
9166 	     * but that only happens if charrefs or entities references
9167 	     * have been used in the attribute value, i.e. the attribute
9168 	     * value have been extracted in an allocated string already.
9169 	     */
9170 	    if (*alloc) {
9171 	        const xmlChar *val2;
9172 
9173 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9174 		if ((val2 != NULL) && (val2 != val)) {
9175 		    xmlFree(val);
9176 		    val = (xmlChar *) val2;
9177 		}
9178 	    }
9179 	}
9180         ctxt->instate = XML_PARSER_CONTENT;
9181     } else {
9182         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9183                           "Specification mandates value for attribute %s\n",
9184                           name);
9185         return (NULL);
9186     }
9187 
9188     if (*prefix == ctxt->str_xml) {
9189         /*
9190          * Check that xml:lang conforms to the specification
9191          * No more registered as an error, just generate a warning now
9192          * since this was deprecated in XML second edition
9193          */
9194         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9195             internal_val = xmlStrndup(val, *len);
9196             if (!xmlCheckLanguageID(internal_val)) {
9197                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9198                               "Malformed value for xml:lang : %s\n",
9199                               internal_val, NULL);
9200             }
9201         }
9202 
9203         /*
9204          * Check that xml:space conforms to the specification
9205          */
9206         if (xmlStrEqual(name, BAD_CAST "space")) {
9207             internal_val = xmlStrndup(val, *len);
9208             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9209                 *(ctxt->space) = 0;
9210             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9211                 *(ctxt->space) = 1;
9212             else {
9213                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9214                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215                               internal_val, NULL);
9216             }
9217         }
9218         if (internal_val) {
9219             xmlFree(internal_val);
9220         }
9221     }
9222 
9223     *value = val;
9224     return (name);
9225 }
9226 /**
9227  * xmlParseStartTag2:
9228  * @ctxt:  an XML parser context
9229  *
9230  * parse a start of tag either for rule element or
9231  * EmptyElement. In both case we don't parse the tag closing chars.
9232  * This routine is called when running SAX2 parsing
9233  *
9234  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9235  *
9236  * [ WFC: Unique Att Spec ]
9237  * No attribute name may appear more than once in the same start-tag or
9238  * empty-element tag.
9239  *
9240  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9241  *
9242  * [ WFC: Unique Att Spec ]
9243  * No attribute name may appear more than once in the same start-tag or
9244  * empty-element tag.
9245  *
9246  * With namespace:
9247  *
9248  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9249  *
9250  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9251  *
9252  * Returns the element name parsed
9253  */
9254 
9255 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9256 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9257                   const xmlChar **URI, int *tlen) {
9258     const xmlChar *localname;
9259     const xmlChar *prefix;
9260     const xmlChar *attname;
9261     const xmlChar *aprefix;
9262     const xmlChar *nsname;
9263     xmlChar *attvalue;
9264     const xmlChar **atts = ctxt->atts;
9265     int maxatts = ctxt->maxatts;
9266     int nratts, nbatts, nbdef, inputid;
9267     int i, j, nbNs, attval;
9268     unsigned long cur;
9269     int nsNr = ctxt->nsNr;
9270 
9271     if (RAW != '<') return(NULL);
9272     NEXT1;
9273 
9274     /*
9275      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276      *       point since the attribute values may be stored as pointers to
9277      *       the buffer and calling SHRINK would destroy them !
9278      *       The Shrinking is only possible once the full set of attribute
9279      *       callbacks have been done.
9280      */
9281     SHRINK;
9282     cur = ctxt->input->cur - ctxt->input->base;
9283     inputid = ctxt->input->id;
9284     nbatts = 0;
9285     nratts = 0;
9286     nbdef = 0;
9287     nbNs = 0;
9288     attval = 0;
9289     /* Forget any namespaces added during an earlier parse of this element. */
9290     ctxt->nsNr = nsNr;
9291 
9292     localname = xmlParseQName(ctxt, &prefix);
9293     if (localname == NULL) {
9294 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295 		       "StartTag: invalid element name\n");
9296         return(NULL);
9297     }
9298     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9299 
9300     /*
9301      * Now parse the attributes, it ends up with the ending
9302      *
9303      * (S Attribute)* S?
9304      */
9305     SKIP_BLANKS;
9306     GROW;
9307 
9308     while (((RAW != '>') &&
9309 	   ((RAW != '/') || (NXT(1) != '>')) &&
9310 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9311 	int id = ctxt->input->id;
9312 	unsigned long cons = CUR_CONSUMED;
9313 	int len = -1, alloc = 0;
9314 
9315 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9316 	                             &aprefix, &attvalue, &len, &alloc);
9317         if ((attname == NULL) || (attvalue == NULL))
9318             goto next_attr;
9319 	if (len < 0) len = xmlStrlen(attvalue);
9320 
9321         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9322             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9323             xmlURIPtr uri;
9324 
9325             if (URL == NULL) {
9326                 xmlErrMemory(ctxt, "dictionary allocation failure");
9327                 if ((attvalue != NULL) && (alloc != 0))
9328                     xmlFree(attvalue);
9329                 localname = NULL;
9330                 goto done;
9331             }
9332             if (*URL != 0) {
9333                 uri = xmlParseURI((const char *) URL);
9334                 if (uri == NULL) {
9335                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9336                              "xmlns: '%s' is not a valid URI\n",
9337                                        URL, NULL, NULL);
9338                 } else {
9339                     if (uri->scheme == NULL) {
9340                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9341                                   "xmlns: URI %s is not absolute\n",
9342                                   URL, NULL, NULL);
9343                     }
9344                     xmlFreeURI(uri);
9345                 }
9346                 if (URL == ctxt->str_xml_ns) {
9347                     if (attname != ctxt->str_xml) {
9348                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349                      "xml namespace URI cannot be the default namespace\n",
9350                                  NULL, NULL, NULL);
9351                     }
9352                     goto next_attr;
9353                 }
9354                 if ((len == 29) &&
9355                     (xmlStrEqual(URL,
9356                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9357                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9358                          "reuse of the xmlns namespace name is forbidden\n",
9359                              NULL, NULL, NULL);
9360                     goto next_attr;
9361                 }
9362             }
9363             /*
9364              * check that it's not a defined namespace
9365              */
9366             for (j = 1;j <= nbNs;j++)
9367                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9368                     break;
9369             if (j <= nbNs)
9370                 xmlErrAttributeDup(ctxt, NULL, attname);
9371             else
9372                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9373 
9374         } else if (aprefix == ctxt->str_xmlns) {
9375             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9376             xmlURIPtr uri;
9377 
9378             if (attname == ctxt->str_xml) {
9379                 if (URL != ctxt->str_xml_ns) {
9380                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381                              "xml namespace prefix mapped to wrong URI\n",
9382                              NULL, NULL, NULL);
9383                 }
9384                 /*
9385                  * Do not keep a namespace definition node
9386                  */
9387                 goto next_attr;
9388             }
9389             if (URL == ctxt->str_xml_ns) {
9390                 if (attname != ctxt->str_xml) {
9391                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392                              "xml namespace URI mapped to wrong prefix\n",
9393                              NULL, NULL, NULL);
9394                 }
9395                 goto next_attr;
9396             }
9397             if (attname == ctxt->str_xmlns) {
9398                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399                          "redefinition of the xmlns prefix is forbidden\n",
9400                          NULL, NULL, NULL);
9401                 goto next_attr;
9402             }
9403             if ((len == 29) &&
9404                 (xmlStrEqual(URL,
9405                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407                          "reuse of the xmlns namespace name is forbidden\n",
9408                          NULL, NULL, NULL);
9409                 goto next_attr;
9410             }
9411             if ((URL == NULL) || (URL[0] == 0)) {
9412                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9413                          "xmlns:%s: Empty XML namespace is not allowed\n",
9414                               attname, NULL, NULL);
9415                 goto next_attr;
9416             } else {
9417                 uri = xmlParseURI((const char *) URL);
9418                 if (uri == NULL) {
9419                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9420                          "xmlns:%s: '%s' is not a valid URI\n",
9421                                        attname, URL, NULL);
9422                 } else {
9423                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9424                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9425                                   "xmlns:%s: URI %s is not absolute\n",
9426                                   attname, URL, NULL);
9427                     }
9428                     xmlFreeURI(uri);
9429                 }
9430             }
9431 
9432             /*
9433              * check that it's not a defined namespace
9434              */
9435             for (j = 1;j <= nbNs;j++)
9436                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9437                     break;
9438             if (j <= nbNs)
9439                 xmlErrAttributeDup(ctxt, aprefix, attname);
9440             else
9441                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9442 
9443         } else {
9444             /*
9445              * Add the pair to atts
9446              */
9447             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9448                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9449                     goto next_attr;
9450                 }
9451                 maxatts = ctxt->maxatts;
9452                 atts = ctxt->atts;
9453             }
9454             ctxt->attallocs[nratts++] = alloc;
9455             atts[nbatts++] = attname;
9456             atts[nbatts++] = aprefix;
9457             /*
9458              * The namespace URI field is used temporarily to point at the
9459              * base of the current input buffer for non-alloced attributes.
9460              * When the input buffer is reallocated, all the pointers become
9461              * invalid, but they can be reconstructed later.
9462              */
9463             if (alloc)
9464                 atts[nbatts++] = NULL;
9465             else
9466                 atts[nbatts++] = ctxt->input->base;
9467             atts[nbatts++] = attvalue;
9468             attvalue += len;
9469             atts[nbatts++] = attvalue;
9470             /*
9471              * tag if some deallocation is needed
9472              */
9473             if (alloc != 0) attval = 1;
9474             attvalue = NULL; /* moved into atts */
9475         }
9476 
9477 next_attr:
9478         if ((attvalue != NULL) && (alloc != 0)) {
9479             xmlFree(attvalue);
9480             attvalue = NULL;
9481         }
9482 
9483 	GROW
9484         if (ctxt->instate == XML_PARSER_EOF)
9485             break;
9486 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9487 	    break;
9488 	if (SKIP_BLANKS == 0) {
9489 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9490 			   "attributes construct error\n");
9491 	    break;
9492 	}
9493         if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9494             (attname == NULL) && (attvalue == NULL)) {
9495 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9496 	         "xmlParseStartTag: problem parsing attributes\n");
9497 	    break;
9498 	}
9499         GROW;
9500     }
9501 
9502     if (ctxt->input->id != inputid) {
9503         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9504                     "Unexpected change of input\n");
9505         localname = NULL;
9506         goto done;
9507     }
9508 
9509     /* Reconstruct attribute value pointers. */
9510     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9511         if (atts[i+2] != NULL) {
9512             /*
9513              * Arithmetic on dangling pointers is technically undefined
9514              * behavior, but well...
9515              */
9516             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9517             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9518             atts[i+3] += offset;  /* value */
9519             atts[i+4] += offset;  /* valuend */
9520         }
9521     }
9522 
9523     /*
9524      * The attributes defaulting
9525      */
9526     if (ctxt->attsDefault != NULL) {
9527         xmlDefAttrsPtr defaults;
9528 
9529 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9530 	if (defaults != NULL) {
9531 	    for (i = 0;i < defaults->nbAttrs;i++) {
9532 	        attname = defaults->values[5 * i];
9533 		aprefix = defaults->values[5 * i + 1];
9534 
9535                 /*
9536 		 * special work for namespaces defaulted defs
9537 		 */
9538 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9539 		    /*
9540 		     * check that it's not a defined namespace
9541 		     */
9542 		    for (j = 1;j <= nbNs;j++)
9543 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9544 			    break;
9545 	            if (j <= nbNs) continue;
9546 
9547 		    nsname = xmlGetNamespace(ctxt, NULL);
9548 		    if (nsname != defaults->values[5 * i + 2]) {
9549 			if (nsPush(ctxt, NULL,
9550 			           defaults->values[5 * i + 2]) > 0)
9551 			    nbNs++;
9552 		    }
9553 		} else if (aprefix == ctxt->str_xmlns) {
9554 		    /*
9555 		     * check that it's not a defined namespace
9556 		     */
9557 		    for (j = 1;j <= nbNs;j++)
9558 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9559 			    break;
9560 	            if (j <= nbNs) continue;
9561 
9562 		    nsname = xmlGetNamespace(ctxt, attname);
9563 		    if (nsname != defaults->values[2]) {
9564 			if (nsPush(ctxt, attname,
9565 			           defaults->values[5 * i + 2]) > 0)
9566 			    nbNs++;
9567 		    }
9568 		} else {
9569 		    /*
9570 		     * check that it's not a defined attribute
9571 		     */
9572 		    for (j = 0;j < nbatts;j+=5) {
9573 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9574 			    break;
9575 		    }
9576 		    if (j < nbatts) continue;
9577 
9578 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9579 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9580                             localname = NULL;
9581                             goto done;
9582 			}
9583 			maxatts = ctxt->maxatts;
9584 			atts = ctxt->atts;
9585 		    }
9586 		    atts[nbatts++] = attname;
9587 		    atts[nbatts++] = aprefix;
9588 		    if (aprefix == NULL)
9589 			atts[nbatts++] = NULL;
9590 		    else
9591 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9592 		    atts[nbatts++] = defaults->values[5 * i + 2];
9593 		    atts[nbatts++] = defaults->values[5 * i + 3];
9594 		    if ((ctxt->standalone == 1) &&
9595 		        (defaults->values[5 * i + 4] != NULL)) {
9596 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9597 	  "standalone: attribute %s on %s defaulted from external subset\n",
9598 	                                 attname, localname);
9599 		    }
9600 		    nbdef++;
9601 		}
9602 	    }
9603 	}
9604     }
9605 
9606     /*
9607      * The attributes checkings
9608      */
9609     for (i = 0; i < nbatts;i += 5) {
9610         /*
9611 	* The default namespace does not apply to attribute names.
9612 	*/
9613 	if (atts[i + 1] != NULL) {
9614 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9615 	    if (nsname == NULL) {
9616 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9617 		    "Namespace prefix %s for %s on %s is not defined\n",
9618 		    atts[i + 1], atts[i], localname);
9619 	    }
9620 	    atts[i + 2] = nsname;
9621 	} else
9622 	    nsname = NULL;
9623 	/*
9624 	 * [ WFC: Unique Att Spec ]
9625 	 * No attribute name may appear more than once in the same
9626 	 * start-tag or empty-element tag.
9627 	 * As extended by the Namespace in XML REC.
9628 	 */
9629         for (j = 0; j < i;j += 5) {
9630 	    if (atts[i] == atts[j]) {
9631 	        if (atts[i+1] == atts[j+1]) {
9632 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9633 		    break;
9634 		}
9635 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9636 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9637 			     "Namespaced Attribute %s in '%s' redefined\n",
9638 			     atts[i], nsname, NULL);
9639 		    break;
9640 		}
9641 	    }
9642 	}
9643     }
9644 
9645     nsname = xmlGetNamespace(ctxt, prefix);
9646     if ((prefix != NULL) && (nsname == NULL)) {
9647 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9648 	         "Namespace prefix %s on %s is not defined\n",
9649 		 prefix, localname, NULL);
9650     }
9651     *pref = prefix;
9652     *URI = nsname;
9653 
9654     /*
9655      * SAX: Start of Element !
9656      */
9657     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9658 	(!ctxt->disableSAX)) {
9659 	if (nbNs > 0)
9660 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9661 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9662 			  nbatts / 5, nbdef, atts);
9663 	else
9664 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9665 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9666     }
9667 
9668 done:
9669     /*
9670      * Free up attribute allocated strings if needed
9671      */
9672     if (attval != 0) {
9673 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9674 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9675 	        xmlFree((xmlChar *) atts[i]);
9676     }
9677 
9678     return(localname);
9679 }
9680 
9681 /**
9682  * xmlParseEndTag2:
9683  * @ctxt:  an XML parser context
9684  * @line:  line of the start tag
9685  * @nsNr:  number of namespaces on the start tag
9686  *
9687  * parse an end of tag
9688  *
9689  * [42] ETag ::= '</' Name S? '>'
9690  *
9691  * With namespace
9692  *
9693  * [NS 9] ETag ::= '</' QName S? '>'
9694  */
9695 
9696 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9697 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9698     const xmlChar *name;
9699 
9700     GROW;
9701     if ((RAW != '<') || (NXT(1) != '/')) {
9702 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9703 	return;
9704     }
9705     SKIP(2);
9706 
9707     if (tag->prefix == NULL)
9708         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9709     else
9710         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9711 
9712     /*
9713      * We should definitely be at the ending "S? '>'" part
9714      */
9715     GROW;
9716     if (ctxt->instate == XML_PARSER_EOF)
9717         return;
9718     SKIP_BLANKS;
9719     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9720 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9721     } else
9722 	NEXT1;
9723 
9724     /*
9725      * [ WFC: Element Type Match ]
9726      * The Name in an element's end-tag must match the element type in the
9727      * start-tag.
9728      *
9729      */
9730     if (name != (xmlChar*)1) {
9731         if (name == NULL) name = BAD_CAST "unparsable";
9732         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9733 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9734 		                ctxt->name, tag->line, name);
9735     }
9736 
9737     /*
9738      * SAX: End of Tag
9739      */
9740     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9741 	(!ctxt->disableSAX))
9742 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9743                                 tag->URI);
9744 
9745     spacePop(ctxt);
9746     if (tag->nsNr != 0)
9747 	nsPop(ctxt, tag->nsNr);
9748 }
9749 
9750 /**
9751  * xmlParseCDSect:
9752  * @ctxt:  an XML parser context
9753  *
9754  * Parse escaped pure raw content.
9755  *
9756  * [18] CDSect ::= CDStart CData CDEnd
9757  *
9758  * [19] CDStart ::= '<![CDATA['
9759  *
9760  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9761  *
9762  * [21] CDEnd ::= ']]>'
9763  */
9764 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9765 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9766     xmlChar *buf = NULL;
9767     int len = 0;
9768     int size = XML_PARSER_BUFFER_SIZE;
9769     int r, rl;
9770     int	s, sl;
9771     int cur, l;
9772     int count = 0;
9773     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9774                     XML_MAX_HUGE_LENGTH :
9775                     XML_MAX_TEXT_LENGTH;
9776 
9777     /* Check 2.6.0 was NXT(0) not RAW */
9778     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9779 	SKIP(9);
9780     } else
9781         return;
9782 
9783     ctxt->instate = XML_PARSER_CDATA_SECTION;
9784     r = CUR_CHAR(rl);
9785     if (!IS_CHAR(r)) {
9786 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9787 	ctxt->instate = XML_PARSER_CONTENT;
9788         return;
9789     }
9790     NEXTL(rl);
9791     s = CUR_CHAR(sl);
9792     if (!IS_CHAR(s)) {
9793 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794 	ctxt->instate = XML_PARSER_CONTENT;
9795         return;
9796     }
9797     NEXTL(sl);
9798     cur = CUR_CHAR(l);
9799     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9800     if (buf == NULL) {
9801 	xmlErrMemory(ctxt, NULL);
9802 	return;
9803     }
9804     while (IS_CHAR(cur) &&
9805            ((r != ']') || (s != ']') || (cur != '>'))) {
9806 	if (len + 5 >= size) {
9807 	    xmlChar *tmp;
9808 
9809 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9810 	    if (tmp == NULL) {
9811 	        xmlFree(buf);
9812 		xmlErrMemory(ctxt, NULL);
9813 		return;
9814 	    }
9815 	    buf = tmp;
9816 	    size *= 2;
9817 	}
9818 	COPY_BUF(rl,buf,len,r);
9819 	r = s;
9820 	rl = sl;
9821 	s = cur;
9822 	sl = l;
9823 	count++;
9824 	if (count > 50) {
9825 	    SHRINK;
9826 	    GROW;
9827             if (ctxt->instate == XML_PARSER_EOF) {
9828 		xmlFree(buf);
9829 		return;
9830             }
9831 	    count = 0;
9832 	}
9833 	NEXTL(l);
9834 	cur = CUR_CHAR(l);
9835         if (len > maxLength) {
9836             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837                            "CData section too big found\n");
9838             xmlFree(buf);
9839             return;
9840         }
9841     }
9842     buf[len] = 0;
9843     ctxt->instate = XML_PARSER_CONTENT;
9844     if (cur != '>') {
9845 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9846 	                     "CData section not finished\n%.50s\n", buf);
9847 	xmlFree(buf);
9848         return;
9849     }
9850     NEXTL(l);
9851 
9852     /*
9853      * OK the buffer is to be consumed as cdata.
9854      */
9855     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9856 	if (ctxt->sax->cdataBlock != NULL)
9857 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9858 	else if (ctxt->sax->characters != NULL)
9859 	    ctxt->sax->characters(ctxt->userData, buf, len);
9860     }
9861     xmlFree(buf);
9862 }
9863 
9864 /**
9865  * xmlParseContentInternal:
9866  * @ctxt:  an XML parser context
9867  *
9868  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9869  * unexpected EOF to the caller.
9870  */
9871 
9872 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9873 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9874     int nameNr = ctxt->nameNr;
9875 
9876     GROW;
9877     while ((RAW != 0) &&
9878 	   (ctxt->instate != XML_PARSER_EOF)) {
9879         int id = ctxt->input->id;
9880 	unsigned long cons = CUR_CONSUMED;
9881 	const xmlChar *cur = ctxt->input->cur;
9882 
9883 	/*
9884 	 * First case : a Processing Instruction.
9885 	 */
9886 	if ((*cur == '<') && (cur[1] == '?')) {
9887 	    xmlParsePI(ctxt);
9888 	}
9889 
9890 	/*
9891 	 * Second case : a CDSection
9892 	 */
9893 	/* 2.6.0 test was *cur not RAW */
9894 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9895 	    xmlParseCDSect(ctxt);
9896 	}
9897 
9898 	/*
9899 	 * Third case :  a comment
9900 	 */
9901 	else if ((*cur == '<') && (NXT(1) == '!') &&
9902 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9903 	    xmlParseComment(ctxt);
9904 	    ctxt->instate = XML_PARSER_CONTENT;
9905 	}
9906 
9907 	/*
9908 	 * Fourth case :  a sub-element.
9909 	 */
9910 	else if (*cur == '<') {
9911             if (NXT(1) == '/') {
9912                 if (ctxt->nameNr <= nameNr)
9913                     break;
9914 	        xmlParseElementEnd(ctxt);
9915             } else {
9916 	        xmlParseElementStart(ctxt);
9917             }
9918 	}
9919 
9920 	/*
9921 	 * Fifth case : a reference. If if has not been resolved,
9922 	 *    parsing returns it's Name, create the node
9923 	 */
9924 
9925 	else if (*cur == '&') {
9926 	    xmlParseReference(ctxt);
9927 	}
9928 
9929 	/*
9930 	 * Last case, text. Note that References are handled directly.
9931 	 */
9932 	else {
9933 	    xmlParseCharData(ctxt, 0);
9934 	}
9935 
9936 	GROW;
9937 	SHRINK;
9938 
9939 	if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9940 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9941 	                "detected an error in element content\n");
9942 	    xmlHaltParser(ctxt);
9943             break;
9944 	}
9945     }
9946 }
9947 
9948 /**
9949  * xmlParseContent:
9950  * @ctxt:  an XML parser context
9951  *
9952  * Parse a content sequence. Stops at EOF or '</'.
9953  *
9954  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9955  */
9956 
9957 void
xmlParseContent(xmlParserCtxtPtr ctxt)9958 xmlParseContent(xmlParserCtxtPtr ctxt) {
9959     int nameNr = ctxt->nameNr;
9960 
9961     xmlParseContentInternal(ctxt);
9962 
9963     if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9964         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9965         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9966         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9967                 "Premature end of data in tag %s line %d\n",
9968 		name, line, NULL);
9969     }
9970 }
9971 
9972 /**
9973  * xmlParseElement:
9974  * @ctxt:  an XML parser context
9975  *
9976  * parse an XML element
9977  *
9978  * [39] element ::= EmptyElemTag | STag content ETag
9979  *
9980  * [ WFC: Element Type Match ]
9981  * The Name in an element's end-tag must match the element type in the
9982  * start-tag.
9983  *
9984  */
9985 
9986 void
xmlParseElement(xmlParserCtxtPtr ctxt)9987 xmlParseElement(xmlParserCtxtPtr ctxt) {
9988     if (xmlParseElementStart(ctxt) != 0)
9989         return;
9990 
9991     xmlParseContentInternal(ctxt);
9992     if (ctxt->instate == XML_PARSER_EOF)
9993 	return;
9994 
9995     if (CUR == 0) {
9996         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9997         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9998         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9999                 "Premature end of data in tag %s line %d\n",
10000 		name, line, NULL);
10001         return;
10002     }
10003 
10004     xmlParseElementEnd(ctxt);
10005 }
10006 
10007 /**
10008  * xmlParseElementStart:
10009  * @ctxt:  an XML parser context
10010  *
10011  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10012  * opening tag was parsed, 1 if an empty element was parsed.
10013  */
10014 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10015 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10016     const xmlChar *name;
10017     const xmlChar *prefix = NULL;
10018     const xmlChar *URI = NULL;
10019     xmlParserNodeInfo node_info;
10020     int line, tlen = 0;
10021     xmlNodePtr ret;
10022     int nsNr = ctxt->nsNr;
10023 
10024     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10025         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10026 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10027 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10028 			  xmlParserMaxDepth);
10029 	xmlHaltParser(ctxt);
10030 	return(-1);
10031     }
10032 
10033     /* Capture start position */
10034     if (ctxt->record_info) {
10035         node_info.begin_pos = ctxt->input->consumed +
10036                           (CUR_PTR - ctxt->input->base);
10037 	node_info.begin_line = ctxt->input->line;
10038     }
10039 
10040     if (ctxt->spaceNr == 0)
10041 	spacePush(ctxt, -1);
10042     else if (*ctxt->space == -2)
10043 	spacePush(ctxt, -1);
10044     else
10045 	spacePush(ctxt, *ctxt->space);
10046 
10047     line = ctxt->input->line;
10048 #ifdef LIBXML_SAX1_ENABLED
10049     if (ctxt->sax2)
10050 #endif /* LIBXML_SAX1_ENABLED */
10051         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10052 #ifdef LIBXML_SAX1_ENABLED
10053     else
10054 	name = xmlParseStartTag(ctxt);
10055 #endif /* LIBXML_SAX1_ENABLED */
10056     if (ctxt->instate == XML_PARSER_EOF)
10057 	return(-1);
10058     if (name == NULL) {
10059 	spacePop(ctxt);
10060         return(-1);
10061     }
10062     nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10063     ret = ctxt->node;
10064 
10065 #ifdef LIBXML_VALID_ENABLED
10066     /*
10067      * [ VC: Root Element Type ]
10068      * The Name in the document type declaration must match the element
10069      * type of the root element.
10070      */
10071     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10072         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10073         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10074 #endif /* LIBXML_VALID_ENABLED */
10075 
10076     /*
10077      * Check for an Empty Element.
10078      */
10079     if ((RAW == '/') && (NXT(1) == '>')) {
10080         SKIP(2);
10081 	if (ctxt->sax2) {
10082 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10083 		(!ctxt->disableSAX))
10084 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10085 #ifdef LIBXML_SAX1_ENABLED
10086 	} else {
10087 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10088 		(!ctxt->disableSAX))
10089 		ctxt->sax->endElement(ctxt->userData, name);
10090 #endif /* LIBXML_SAX1_ENABLED */
10091 	}
10092 	namePop(ctxt);
10093 	spacePop(ctxt);
10094 	if (nsNr != ctxt->nsNr)
10095 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10096 	if ( ret != NULL && ctxt->record_info ) {
10097 	   node_info.end_pos = ctxt->input->consumed +
10098 			      (CUR_PTR - ctxt->input->base);
10099 	   node_info.end_line = ctxt->input->line;
10100 	   node_info.node = ret;
10101 	   xmlParserAddNodeInfo(ctxt, &node_info);
10102 	}
10103 	return(1);
10104     }
10105     if (RAW == '>') {
10106         NEXT1;
10107     } else {
10108         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10109 		     "Couldn't find end of Start Tag %s line %d\n",
10110 		                name, line, NULL);
10111 
10112 	/*
10113 	 * end of parsing of this node.
10114 	 */
10115 	nodePop(ctxt);
10116 	namePop(ctxt);
10117 	spacePop(ctxt);
10118 	if (nsNr != ctxt->nsNr)
10119 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10120 
10121 	/*
10122 	 * Capture end position and add node
10123 	 */
10124 	if ( ret != NULL && ctxt->record_info ) {
10125 	   node_info.end_pos = ctxt->input->consumed +
10126 			      (CUR_PTR - ctxt->input->base);
10127 	   node_info.end_line = ctxt->input->line;
10128 	   node_info.node = ret;
10129 	   xmlParserAddNodeInfo(ctxt, &node_info);
10130 	}
10131 	return(-1);
10132     }
10133 
10134     return(0);
10135 }
10136 
10137 /**
10138  * xmlParseElementEnd:
10139  * @ctxt:  an XML parser context
10140  *
10141  * Parse the end of an XML element.
10142  */
10143 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10144 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10145     xmlParserNodeInfo node_info;
10146     xmlNodePtr ret = ctxt->node;
10147 
10148     if (ctxt->nameNr <= 0)
10149         return;
10150 
10151     /*
10152      * parse the end of tag: '</' should be here.
10153      */
10154     if (ctxt->sax2) {
10155 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10156 	namePop(ctxt);
10157     }
10158 #ifdef LIBXML_SAX1_ENABLED
10159     else
10160 	xmlParseEndTag1(ctxt, 0);
10161 #endif /* LIBXML_SAX1_ENABLED */
10162 
10163     /*
10164      * Capture end position and add node
10165      */
10166     if ( ret != NULL && ctxt->record_info ) {
10167        node_info.end_pos = ctxt->input->consumed +
10168                           (CUR_PTR - ctxt->input->base);
10169        node_info.end_line = ctxt->input->line;
10170        node_info.node = ret;
10171        xmlParserAddNodeInfo(ctxt, &node_info);
10172     }
10173 }
10174 
10175 /**
10176  * xmlParseVersionNum:
10177  * @ctxt:  an XML parser context
10178  *
10179  * parse the XML version value.
10180  *
10181  * [26] VersionNum ::= '1.' [0-9]+
10182  *
10183  * In practice allow [0-9].[0-9]+ at that level
10184  *
10185  * Returns the string giving the XML version number, or NULL
10186  */
10187 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10188 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10189     xmlChar *buf = NULL;
10190     int len = 0;
10191     int size = 10;
10192     xmlChar cur;
10193 
10194     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10195     if (buf == NULL) {
10196 	xmlErrMemory(ctxt, NULL);
10197 	return(NULL);
10198     }
10199     cur = CUR;
10200     if (!((cur >= '0') && (cur <= '9'))) {
10201 	xmlFree(buf);
10202 	return(NULL);
10203     }
10204     buf[len++] = cur;
10205     NEXT;
10206     cur=CUR;
10207     if (cur != '.') {
10208 	xmlFree(buf);
10209 	return(NULL);
10210     }
10211     buf[len++] = cur;
10212     NEXT;
10213     cur=CUR;
10214     while ((cur >= '0') && (cur <= '9')) {
10215 	if (len + 1 >= size) {
10216 	    xmlChar *tmp;
10217 
10218 	    size *= 2;
10219 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10220 	    if (tmp == NULL) {
10221 	        xmlFree(buf);
10222 		xmlErrMemory(ctxt, NULL);
10223 		return(NULL);
10224 	    }
10225 	    buf = tmp;
10226 	}
10227 	buf[len++] = cur;
10228 	NEXT;
10229 	cur=CUR;
10230     }
10231     buf[len] = 0;
10232     return(buf);
10233 }
10234 
10235 /**
10236  * xmlParseVersionInfo:
10237  * @ctxt:  an XML parser context
10238  *
10239  * parse the XML version.
10240  *
10241  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10242  *
10243  * [25] Eq ::= S? '=' S?
10244  *
10245  * Returns the version string, e.g. "1.0"
10246  */
10247 
10248 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10249 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10250     xmlChar *version = NULL;
10251 
10252     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10253 	SKIP(7);
10254 	SKIP_BLANKS;
10255 	if (RAW != '=') {
10256 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257 	    return(NULL);
10258         }
10259 	NEXT;
10260 	SKIP_BLANKS;
10261 	if (RAW == '"') {
10262 	    NEXT;
10263 	    version = xmlParseVersionNum(ctxt);
10264 	    if (RAW != '"') {
10265 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10266 	    } else
10267 	        NEXT;
10268 	} else if (RAW == '\''){
10269 	    NEXT;
10270 	    version = xmlParseVersionNum(ctxt);
10271 	    if (RAW != '\'') {
10272 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273 	    } else
10274 	        NEXT;
10275 	} else {
10276 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10277 	}
10278     }
10279     return(version);
10280 }
10281 
10282 /**
10283  * xmlParseEncName:
10284  * @ctxt:  an XML parser context
10285  *
10286  * parse the XML encoding name
10287  *
10288  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10289  *
10290  * Returns the encoding name value or NULL
10291  */
10292 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10293 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10294     xmlChar *buf = NULL;
10295     int len = 0;
10296     int size = 10;
10297     xmlChar cur;
10298 
10299     cur = CUR;
10300     if (((cur >= 'a') && (cur <= 'z')) ||
10301         ((cur >= 'A') && (cur <= 'Z'))) {
10302 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10303 	if (buf == NULL) {
10304 	    xmlErrMemory(ctxt, NULL);
10305 	    return(NULL);
10306 	}
10307 
10308 	buf[len++] = cur;
10309 	NEXT;
10310 	cur = CUR;
10311 	while (((cur >= 'a') && (cur <= 'z')) ||
10312 	       ((cur >= 'A') && (cur <= 'Z')) ||
10313 	       ((cur >= '0') && (cur <= '9')) ||
10314 	       (cur == '.') || (cur == '_') ||
10315 	       (cur == '-')) {
10316 	    if (len + 1 >= size) {
10317 	        xmlChar *tmp;
10318 
10319 		size *= 2;
10320 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10321 		if (tmp == NULL) {
10322 		    xmlErrMemory(ctxt, NULL);
10323 		    xmlFree(buf);
10324 		    return(NULL);
10325 		}
10326 		buf = tmp;
10327 	    }
10328 	    buf[len++] = cur;
10329 	    NEXT;
10330 	    cur = CUR;
10331 	    if (cur == 0) {
10332 	        SHRINK;
10333 		GROW;
10334 		cur = CUR;
10335 	    }
10336         }
10337 	buf[len] = 0;
10338     } else {
10339 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10340     }
10341     return(buf);
10342 }
10343 
10344 /**
10345  * xmlParseEncodingDecl:
10346  * @ctxt:  an XML parser context
10347  *
10348  * parse the XML encoding declaration
10349  *
10350  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10351  *
10352  * this setups the conversion filters.
10353  *
10354  * Returns the encoding value or NULL
10355  */
10356 
10357 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10358 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10359     xmlChar *encoding = NULL;
10360 
10361     SKIP_BLANKS;
10362     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10363 	SKIP(8);
10364 	SKIP_BLANKS;
10365 	if (RAW != '=') {
10366 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10367 	    return(NULL);
10368         }
10369 	NEXT;
10370 	SKIP_BLANKS;
10371 	if (RAW == '"') {
10372 	    NEXT;
10373 	    encoding = xmlParseEncName(ctxt);
10374 	    if (RAW != '"') {
10375 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376 		xmlFree((xmlChar *) encoding);
10377 		return(NULL);
10378 	    } else
10379 	        NEXT;
10380 	} else if (RAW == '\''){
10381 	    NEXT;
10382 	    encoding = xmlParseEncName(ctxt);
10383 	    if (RAW != '\'') {
10384 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10385 		xmlFree((xmlChar *) encoding);
10386 		return(NULL);
10387 	    } else
10388 	        NEXT;
10389 	} else {
10390 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10391 	}
10392 
10393         /*
10394          * Non standard parsing, allowing the user to ignore encoding
10395          */
10396         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10397 	    xmlFree((xmlChar *) encoding);
10398             return(NULL);
10399 	}
10400 
10401 	/*
10402 	 * UTF-16 encoding switch has already taken place at this stage,
10403 	 * more over the little-endian/big-endian selection is already done
10404 	 */
10405         if ((encoding != NULL) &&
10406 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10407 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10408 	    /*
10409 	     * If no encoding was passed to the parser, that we are
10410 	     * using UTF-16 and no decoder is present i.e. the
10411 	     * document is apparently UTF-8 compatible, then raise an
10412 	     * encoding mismatch fatal error
10413 	     */
10414 	    if ((ctxt->encoding == NULL) &&
10415 	        (ctxt->input->buf != NULL) &&
10416 	        (ctxt->input->buf->encoder == NULL)) {
10417 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10418 		  "Document labelled UTF-16 but has UTF-8 content\n");
10419 	    }
10420 	    if (ctxt->encoding != NULL)
10421 		xmlFree((xmlChar *) ctxt->encoding);
10422 	    ctxt->encoding = encoding;
10423 	}
10424 	/*
10425 	 * UTF-8 encoding is handled natively
10426 	 */
10427         else if ((encoding != NULL) &&
10428 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10429 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10430 	    if (ctxt->encoding != NULL)
10431 		xmlFree((xmlChar *) ctxt->encoding);
10432 	    ctxt->encoding = encoding;
10433 	}
10434 	else if (encoding != NULL) {
10435 	    xmlCharEncodingHandlerPtr handler;
10436 
10437 	    if (ctxt->input->encoding != NULL)
10438 		xmlFree((xmlChar *) ctxt->input->encoding);
10439 	    ctxt->input->encoding = encoding;
10440 
10441             handler = xmlFindCharEncodingHandler((const char *) encoding);
10442 	    if (handler != NULL) {
10443 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10444 		    /* failed to convert */
10445 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10446 		    return(NULL);
10447 		}
10448 	    } else {
10449 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10450 			"Unsupported encoding %s\n", encoding);
10451 		return(NULL);
10452 	    }
10453 	}
10454     }
10455     return(encoding);
10456 }
10457 
10458 /**
10459  * xmlParseSDDecl:
10460  * @ctxt:  an XML parser context
10461  *
10462  * parse the XML standalone declaration
10463  *
10464  * [32] SDDecl ::= S 'standalone' Eq
10465  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10466  *
10467  * [ VC: Standalone Document Declaration ]
10468  * TODO The standalone document declaration must have the value "no"
10469  * if any external markup declarations contain declarations of:
10470  *  - attributes with default values, if elements to which these
10471  *    attributes apply appear in the document without specifications
10472  *    of values for these attributes, or
10473  *  - entities (other than amp, lt, gt, apos, quot), if references
10474  *    to those entities appear in the document, or
10475  *  - attributes with values subject to normalization, where the
10476  *    attribute appears in the document with a value which will change
10477  *    as a result of normalization, or
10478  *  - element types with element content, if white space occurs directly
10479  *    within any instance of those types.
10480  *
10481  * Returns:
10482  *   1 if standalone="yes"
10483  *   0 if standalone="no"
10484  *  -2 if standalone attribute is missing or invalid
10485  *	  (A standalone value of -2 means that the XML declaration was found,
10486  *	   but no value was specified for the standalone attribute).
10487  */
10488 
10489 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10490 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10491     int standalone = -2;
10492 
10493     SKIP_BLANKS;
10494     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10495 	SKIP(10);
10496         SKIP_BLANKS;
10497 	if (RAW != '=') {
10498 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10499 	    return(standalone);
10500         }
10501 	NEXT;
10502 	SKIP_BLANKS;
10503         if (RAW == '\''){
10504 	    NEXT;
10505 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10506 	        standalone = 0;
10507                 SKIP(2);
10508 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10509 	               (NXT(2) == 's')) {
10510 	        standalone = 1;
10511 		SKIP(3);
10512             } else {
10513 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10514 	    }
10515 	    if (RAW != '\'') {
10516 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10517 	    } else
10518 	        NEXT;
10519 	} else if (RAW == '"'){
10520 	    NEXT;
10521 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10522 	        standalone = 0;
10523 		SKIP(2);
10524 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525 	               (NXT(2) == 's')) {
10526 	        standalone = 1;
10527                 SKIP(3);
10528             } else {
10529 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530 	    }
10531 	    if (RAW != '"') {
10532 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533 	    } else
10534 	        NEXT;
10535 	} else {
10536 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10537         }
10538     }
10539     return(standalone);
10540 }
10541 
10542 /**
10543  * xmlParseXMLDecl:
10544  * @ctxt:  an XML parser context
10545  *
10546  * parse an XML declaration header
10547  *
10548  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10549  */
10550 
10551 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10552 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10553     xmlChar *version;
10554 
10555     /*
10556      * This value for standalone indicates that the document has an
10557      * XML declaration but it does not have a standalone attribute.
10558      * It will be overwritten later if a standalone attribute is found.
10559      */
10560     ctxt->input->standalone = -2;
10561 
10562     /*
10563      * We know that '<?xml' is here.
10564      */
10565     SKIP(5);
10566 
10567     if (!IS_BLANK_CH(RAW)) {
10568 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10569 	               "Blank needed after '<?xml'\n");
10570     }
10571     SKIP_BLANKS;
10572 
10573     /*
10574      * We must have the VersionInfo here.
10575      */
10576     version = xmlParseVersionInfo(ctxt);
10577     if (version == NULL) {
10578 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10579     } else {
10580 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10581 	    /*
10582 	     * Changed here for XML-1.0 5th edition
10583 	     */
10584 	    if (ctxt->options & XML_PARSE_OLD10) {
10585 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586 			          "Unsupported version '%s'\n",
10587 			          version);
10588 	    } else {
10589 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10590 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10591 		                  "Unsupported version '%s'\n",
10592 				  version, NULL);
10593 		} else {
10594 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10595 				      "Unsupported version '%s'\n",
10596 				      version);
10597 		}
10598 	    }
10599 	}
10600 	if (ctxt->version != NULL)
10601 	    xmlFree((void *) ctxt->version);
10602 	ctxt->version = version;
10603     }
10604 
10605     /*
10606      * We may have the encoding declaration
10607      */
10608     if (!IS_BLANK_CH(RAW)) {
10609         if ((RAW == '?') && (NXT(1) == '>')) {
10610 	    SKIP(2);
10611 	    return;
10612 	}
10613 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10614     }
10615     xmlParseEncodingDecl(ctxt);
10616     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10617          (ctxt->instate == XML_PARSER_EOF)) {
10618 	/*
10619 	 * The XML REC instructs us to stop parsing right here
10620 	 */
10621         return;
10622     }
10623 
10624     /*
10625      * We may have the standalone status.
10626      */
10627     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10628         if ((RAW == '?') && (NXT(1) == '>')) {
10629 	    SKIP(2);
10630 	    return;
10631 	}
10632 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10633     }
10634 
10635     /*
10636      * We can grow the input buffer freely at that point
10637      */
10638     GROW;
10639 
10640     SKIP_BLANKS;
10641     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10642 
10643     SKIP_BLANKS;
10644     if ((RAW == '?') && (NXT(1) == '>')) {
10645         SKIP(2);
10646     } else if (RAW == '>') {
10647         /* Deprecated old WD ... */
10648 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10649 	NEXT;
10650     } else {
10651 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652 	MOVETO_ENDTAG(CUR_PTR);
10653 	NEXT;
10654     }
10655 }
10656 
10657 /**
10658  * xmlParseMisc:
10659  * @ctxt:  an XML parser context
10660  *
10661  * parse an XML Misc* optional field.
10662  *
10663  * [27] Misc ::= Comment | PI |  S
10664  */
10665 
10666 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10667 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10668     while (ctxt->instate != XML_PARSER_EOF) {
10669         SKIP_BLANKS;
10670         GROW;
10671         if ((RAW == '<') && (NXT(1) == '?')) {
10672 	    xmlParsePI(ctxt);
10673         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10674 	    xmlParseComment(ctxt);
10675         } else {
10676             break;
10677         }
10678     }
10679 }
10680 
10681 /**
10682  * xmlParseDocument:
10683  * @ctxt:  an XML parser context
10684  *
10685  * parse an XML document (and build a tree if using the standard SAX
10686  * interface).
10687  *
10688  * [1] document ::= prolog element Misc*
10689  *
10690  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10691  *
10692  * Returns 0, -1 in case of error. the parser context is augmented
10693  *                as a result of the parsing.
10694  */
10695 
10696 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10697 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10698     xmlChar start[4];
10699     xmlCharEncoding enc;
10700 
10701     xmlInitParser();
10702 
10703     if ((ctxt == NULL) || (ctxt->input == NULL))
10704         return(-1);
10705 
10706     GROW;
10707 
10708     /*
10709      * SAX: detecting the level.
10710      */
10711     xmlDetectSAX2(ctxt);
10712 
10713     /*
10714      * SAX: beginning of the document processing.
10715      */
10716     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10717         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10718     if (ctxt->instate == XML_PARSER_EOF)
10719 	return(-1);
10720 
10721     if ((ctxt->encoding == NULL) &&
10722         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10723 	/*
10724 	 * Get the 4 first bytes and decode the charset
10725 	 * if enc != XML_CHAR_ENCODING_NONE
10726 	 * plug some encoding conversion routines.
10727 	 */
10728 	start[0] = RAW;
10729 	start[1] = NXT(1);
10730 	start[2] = NXT(2);
10731 	start[3] = NXT(3);
10732 	enc = xmlDetectCharEncoding(&start[0], 4);
10733 	if (enc != XML_CHAR_ENCODING_NONE) {
10734 	    xmlSwitchEncoding(ctxt, enc);
10735 	}
10736     }
10737 
10738 
10739     if (CUR == 0) {
10740 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10741 	return(-1);
10742     }
10743 
10744     /*
10745      * Check for the XMLDecl in the Prolog.
10746      * do not GROW here to avoid the detected encoder to decode more
10747      * than just the first line, unless the amount of data is really
10748      * too small to hold "<?xml version="1.0" encoding="foo"
10749      */
10750     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10751        GROW;
10752     }
10753     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10754 
10755 	/*
10756 	 * Note that we will switch encoding on the fly.
10757 	 */
10758 	xmlParseXMLDecl(ctxt);
10759 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10760 	    (ctxt->instate == XML_PARSER_EOF)) {
10761 	    /*
10762 	     * The XML REC instructs us to stop parsing right here
10763 	     */
10764 	    return(-1);
10765 	}
10766 	ctxt->standalone = ctxt->input->standalone;
10767 	SKIP_BLANKS;
10768     } else {
10769 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10770     }
10771     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10772         ctxt->sax->startDocument(ctxt->userData);
10773     if (ctxt->instate == XML_PARSER_EOF)
10774 	return(-1);
10775     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10776         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10777 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10778     }
10779 
10780     /*
10781      * The Misc part of the Prolog
10782      */
10783     xmlParseMisc(ctxt);
10784 
10785     /*
10786      * Then possibly doc type declaration(s) and more Misc
10787      * (doctypedecl Misc*)?
10788      */
10789     GROW;
10790     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10791 
10792 	ctxt->inSubset = 1;
10793 	xmlParseDocTypeDecl(ctxt);
10794 	if (RAW == '[') {
10795 	    ctxt->instate = XML_PARSER_DTD;
10796 	    xmlParseInternalSubset(ctxt);
10797 	    if (ctxt->instate == XML_PARSER_EOF)
10798 		return(-1);
10799 	}
10800 
10801 	/*
10802 	 * Create and update the external subset.
10803 	 */
10804 	ctxt->inSubset = 2;
10805 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10806 	    (!ctxt->disableSAX))
10807 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10808 	                              ctxt->extSubSystem, ctxt->extSubURI);
10809 	if (ctxt->instate == XML_PARSER_EOF)
10810 	    return(-1);
10811 	ctxt->inSubset = 0;
10812 
10813         xmlCleanSpecialAttr(ctxt);
10814 
10815 	ctxt->instate = XML_PARSER_PROLOG;
10816 	xmlParseMisc(ctxt);
10817     }
10818 
10819     /*
10820      * Time to start parsing the tree itself
10821      */
10822     GROW;
10823     if (RAW != '<') {
10824 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10825 		       "Start tag expected, '<' not found\n");
10826     } else {
10827 	ctxt->instate = XML_PARSER_CONTENT;
10828 	xmlParseElement(ctxt);
10829 	ctxt->instate = XML_PARSER_EPILOG;
10830 
10831 
10832 	/*
10833 	 * The Misc part at the end
10834 	 */
10835 	xmlParseMisc(ctxt);
10836 
10837 	if (RAW != 0) {
10838 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10839 	}
10840 	ctxt->instate = XML_PARSER_EOF;
10841     }
10842 
10843     /*
10844      * SAX: end of the document processing.
10845      */
10846     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10847         ctxt->sax->endDocument(ctxt->userData);
10848 
10849     /*
10850      * Remove locally kept entity definitions if the tree was not built
10851      */
10852     if ((ctxt->myDoc != NULL) &&
10853 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10854 	xmlFreeDoc(ctxt->myDoc);
10855 	ctxt->myDoc = NULL;
10856     }
10857 
10858     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10859         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10860 	if (ctxt->valid)
10861 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10862 	if (ctxt->nsWellFormed)
10863 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10864 	if (ctxt->options & XML_PARSE_OLD10)
10865 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10866     }
10867     if (! ctxt->wellFormed) {
10868 	ctxt->valid = 0;
10869 	return(-1);
10870     }
10871     return(0);
10872 }
10873 
10874 /**
10875  * xmlParseExtParsedEnt:
10876  * @ctxt:  an XML parser context
10877  *
10878  * parse a general parsed entity
10879  * An external general parsed entity is well-formed if it matches the
10880  * production labeled extParsedEnt.
10881  *
10882  * [78] extParsedEnt ::= TextDecl? content
10883  *
10884  * Returns 0, -1 in case of error. the parser context is augmented
10885  *                as a result of the parsing.
10886  */
10887 
10888 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10889 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10890     xmlChar start[4];
10891     xmlCharEncoding enc;
10892 
10893     if ((ctxt == NULL) || (ctxt->input == NULL))
10894         return(-1);
10895 
10896     xmlDetectSAX2(ctxt);
10897 
10898     GROW;
10899 
10900     /*
10901      * SAX: beginning of the document processing.
10902      */
10903     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10904         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10905 
10906     /*
10907      * Get the 4 first bytes and decode the charset
10908      * if enc != XML_CHAR_ENCODING_NONE
10909      * plug some encoding conversion routines.
10910      */
10911     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10912 	start[0] = RAW;
10913 	start[1] = NXT(1);
10914 	start[2] = NXT(2);
10915 	start[3] = NXT(3);
10916 	enc = xmlDetectCharEncoding(start, 4);
10917 	if (enc != XML_CHAR_ENCODING_NONE) {
10918 	    xmlSwitchEncoding(ctxt, enc);
10919 	}
10920     }
10921 
10922 
10923     if (CUR == 0) {
10924 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10925     }
10926 
10927     /*
10928      * Check for the XMLDecl in the Prolog.
10929      */
10930     GROW;
10931     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10932 
10933 	/*
10934 	 * Note that we will switch encoding on the fly.
10935 	 */
10936 	xmlParseXMLDecl(ctxt);
10937 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10938 	    /*
10939 	     * The XML REC instructs us to stop parsing right here
10940 	     */
10941 	    return(-1);
10942 	}
10943 	SKIP_BLANKS;
10944     } else {
10945 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10946     }
10947     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10948         ctxt->sax->startDocument(ctxt->userData);
10949     if (ctxt->instate == XML_PARSER_EOF)
10950 	return(-1);
10951 
10952     /*
10953      * Doing validity checking on chunk doesn't make sense
10954      */
10955     ctxt->instate = XML_PARSER_CONTENT;
10956     ctxt->validate = 0;
10957     ctxt->loadsubset = 0;
10958     ctxt->depth = 0;
10959 
10960     xmlParseContent(ctxt);
10961     if (ctxt->instate == XML_PARSER_EOF)
10962 	return(-1);
10963 
10964     if ((RAW == '<') && (NXT(1) == '/')) {
10965 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10966     } else if (RAW != 0) {
10967 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10968     }
10969 
10970     /*
10971      * SAX: end of the document processing.
10972      */
10973     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974         ctxt->sax->endDocument(ctxt->userData);
10975 
10976     if (! ctxt->wellFormed) return(-1);
10977     return(0);
10978 }
10979 
10980 #ifdef LIBXML_PUSH_ENABLED
10981 /************************************************************************
10982  *									*
10983  *		Progressive parsing interfaces				*
10984  *									*
10985  ************************************************************************/
10986 
10987 /**
10988  * xmlParseLookupSequence:
10989  * @ctxt:  an XML parser context
10990  * @first:  the first char to lookup
10991  * @next:  the next char to lookup or zero
10992  * @third:  the next char to lookup or zero
10993  *
10994  * Try to find if a sequence (first, next, third) or  just (first next) or
10995  * (first) is available in the input stream.
10996  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997  * to avoid rescanning sequences of bytes, it DOES change the state of the
10998  * parser, do not use liberally.
10999  *
11000  * Returns the index to the current parsing point if the full sequence
11001  *      is available, -1 otherwise.
11002  */
11003 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11004 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11005                        xmlChar next, xmlChar third) {
11006     int base, len;
11007     xmlParserInputPtr in;
11008     const xmlChar *buf;
11009 
11010     in = ctxt->input;
11011     if (in == NULL) return(-1);
11012     base = in->cur - in->base;
11013     if (base < 0) return(-1);
11014     if (ctxt->checkIndex > base)
11015         base = ctxt->checkIndex;
11016     if (in->buf == NULL) {
11017 	buf = in->base;
11018 	len = in->length;
11019     } else {
11020 	buf = xmlBufContent(in->buf->buffer);
11021 	len = xmlBufUse(in->buf->buffer);
11022     }
11023     /* take into account the sequence length */
11024     if (third) len -= 2;
11025     else if (next) len --;
11026     for (;base < len;base++) {
11027         if (buf[base] == first) {
11028 	    if (third != 0) {
11029 		if ((buf[base + 1] != next) ||
11030 		    (buf[base + 2] != third)) continue;
11031 	    } else if (next != 0) {
11032 		if (buf[base + 1] != next) continue;
11033 	    }
11034 	    ctxt->checkIndex = 0;
11035 #ifdef DEBUG_PUSH
11036 	    if (next == 0)
11037 		xmlGenericError(xmlGenericErrorContext,
11038 			"PP: lookup '%c' found at %d\n",
11039 			first, base);
11040 	    else if (third == 0)
11041 		xmlGenericError(xmlGenericErrorContext,
11042 			"PP: lookup '%c%c' found at %d\n",
11043 			first, next, base);
11044 	    else
11045 		xmlGenericError(xmlGenericErrorContext,
11046 			"PP: lookup '%c%c%c' found at %d\n",
11047 			first, next, third, base);
11048 #endif
11049 	    return(base - (in->cur - in->base));
11050 	}
11051     }
11052     ctxt->checkIndex = base;
11053 #ifdef DEBUG_PUSH
11054     if (next == 0)
11055 	xmlGenericError(xmlGenericErrorContext,
11056 		"PP: lookup '%c' failed\n", first);
11057     else if (third == 0)
11058 	xmlGenericError(xmlGenericErrorContext,
11059 		"PP: lookup '%c%c' failed\n", first, next);
11060     else
11061 	xmlGenericError(xmlGenericErrorContext,
11062 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11063 #endif
11064     return(-1);
11065 }
11066 
11067 /**
11068  * xmlParseGetLasts:
11069  * @ctxt:  an XML parser context
11070  * @lastlt:  pointer to store the last '<' from the input
11071  * @lastgt:  pointer to store the last '>' from the input
11072  *
11073  * Lookup the last < and > in the current chunk
11074  */
11075 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11076 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11077                  const xmlChar **lastgt) {
11078     const xmlChar *tmp;
11079 
11080     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11081 	xmlGenericError(xmlGenericErrorContext,
11082 		    "Internal error: xmlParseGetLasts\n");
11083 	return;
11084     }
11085     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11086         tmp = ctxt->input->end;
11087 	tmp--;
11088 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11089 	if (tmp < ctxt->input->base) {
11090 	    *lastlt = NULL;
11091 	    *lastgt = NULL;
11092 	} else {
11093 	    *lastlt = tmp;
11094 	    tmp++;
11095 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11096 	        if (*tmp == '\'') {
11097 		    tmp++;
11098 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11099 		    if (tmp < ctxt->input->end) tmp++;
11100 		} else if (*tmp == '"') {
11101 		    tmp++;
11102 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11103 		    if (tmp < ctxt->input->end) tmp++;
11104 		} else
11105 		    tmp++;
11106 	    }
11107 	    if (tmp < ctxt->input->end)
11108 	        *lastgt = tmp;
11109 	    else {
11110 	        tmp = *lastlt;
11111 		tmp--;
11112 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11113 		if (tmp >= ctxt->input->base)
11114 		    *lastgt = tmp;
11115 		else
11116 		    *lastgt = NULL;
11117 	    }
11118 	}
11119     } else {
11120         *lastlt = NULL;
11121 	*lastgt = NULL;
11122     }
11123 }
11124 /**
11125  * xmlCheckCdataPush:
11126  * @cur: pointer to the block of characters
11127  * @len: length of the block in bytes
11128  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11129  *
11130  * Check that the block of characters is okay as SCdata content [20]
11131  *
11132  * Returns the number of bytes to pass if okay, a negative index where an
11133  *         UTF-8 error occurred otherwise
11134  */
11135 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11136 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11137     int ix;
11138     unsigned char c;
11139     int codepoint;
11140 
11141     if ((utf == NULL) || (len <= 0))
11142         return(0);
11143 
11144     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11145         c = utf[ix];
11146         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11147 	    if (c >= 0x20)
11148 		ix++;
11149 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11150 	        ix++;
11151 	    else
11152 	        return(-ix);
11153 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11154 	    if (ix + 2 > len) return(complete ? -ix : ix);
11155 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11156 	        return(-ix);
11157 	    codepoint = (utf[ix] & 0x1f) << 6;
11158 	    codepoint |= utf[ix+1] & 0x3f;
11159 	    if (!xmlIsCharQ(codepoint))
11160 	        return(-ix);
11161 	    ix += 2;
11162 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11163 	    if (ix + 3 > len) return(complete ? -ix : ix);
11164 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11165 	        ((utf[ix+2] & 0xc0) != 0x80))
11166 		    return(-ix);
11167 	    codepoint = (utf[ix] & 0xf) << 12;
11168 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11169 	    codepoint |= utf[ix+2] & 0x3f;
11170 	    if (!xmlIsCharQ(codepoint))
11171 	        return(-ix);
11172 	    ix += 3;
11173 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11174 	    if (ix + 4 > len) return(complete ? -ix : ix);
11175 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11176 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11177 		((utf[ix+3] & 0xc0) != 0x80))
11178 		    return(-ix);
11179 	    codepoint = (utf[ix] & 0x7) << 18;
11180 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11181 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11182 	    codepoint |= utf[ix+3] & 0x3f;
11183 	    if (!xmlIsCharQ(codepoint))
11184 	        return(-ix);
11185 	    ix += 4;
11186 	} else				/* unknown encoding */
11187 	    return(-ix);
11188       }
11189       return(ix);
11190 }
11191 
11192 /**
11193  * xmlParseTryOrFinish:
11194  * @ctxt:  an XML parser context
11195  * @terminate:  last chunk indicator
11196  *
11197  * Try to progress on parsing
11198  *
11199  * Returns zero if no parsing was possible
11200  */
11201 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11202 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11203     int ret = 0;
11204     int avail, tlen;
11205     xmlChar cur, next;
11206     const xmlChar *lastlt, *lastgt;
11207 
11208     if (ctxt->input == NULL)
11209         return(0);
11210 
11211 #ifdef DEBUG_PUSH
11212     switch (ctxt->instate) {
11213 	case XML_PARSER_EOF:
11214 	    xmlGenericError(xmlGenericErrorContext,
11215 		    "PP: try EOF\n"); break;
11216 	case XML_PARSER_START:
11217 	    xmlGenericError(xmlGenericErrorContext,
11218 		    "PP: try START\n"); break;
11219 	case XML_PARSER_MISC:
11220 	    xmlGenericError(xmlGenericErrorContext,
11221 		    "PP: try MISC\n");break;
11222 	case XML_PARSER_COMMENT:
11223 	    xmlGenericError(xmlGenericErrorContext,
11224 		    "PP: try COMMENT\n");break;
11225 	case XML_PARSER_PROLOG:
11226 	    xmlGenericError(xmlGenericErrorContext,
11227 		    "PP: try PROLOG\n");break;
11228 	case XML_PARSER_START_TAG:
11229 	    xmlGenericError(xmlGenericErrorContext,
11230 		    "PP: try START_TAG\n");break;
11231 	case XML_PARSER_CONTENT:
11232 	    xmlGenericError(xmlGenericErrorContext,
11233 		    "PP: try CONTENT\n");break;
11234 	case XML_PARSER_CDATA_SECTION:
11235 	    xmlGenericError(xmlGenericErrorContext,
11236 		    "PP: try CDATA_SECTION\n");break;
11237 	case XML_PARSER_END_TAG:
11238 	    xmlGenericError(xmlGenericErrorContext,
11239 		    "PP: try END_TAG\n");break;
11240 	case XML_PARSER_ENTITY_DECL:
11241 	    xmlGenericError(xmlGenericErrorContext,
11242 		    "PP: try ENTITY_DECL\n");break;
11243 	case XML_PARSER_ENTITY_VALUE:
11244 	    xmlGenericError(xmlGenericErrorContext,
11245 		    "PP: try ENTITY_VALUE\n");break;
11246 	case XML_PARSER_ATTRIBUTE_VALUE:
11247 	    xmlGenericError(xmlGenericErrorContext,
11248 		    "PP: try ATTRIBUTE_VALUE\n");break;
11249 	case XML_PARSER_DTD:
11250 	    xmlGenericError(xmlGenericErrorContext,
11251 		    "PP: try DTD\n");break;
11252 	case XML_PARSER_EPILOG:
11253 	    xmlGenericError(xmlGenericErrorContext,
11254 		    "PP: try EPILOG\n");break;
11255 	case XML_PARSER_PI:
11256 	    xmlGenericError(xmlGenericErrorContext,
11257 		    "PP: try PI\n");break;
11258         case XML_PARSER_IGNORE:
11259             xmlGenericError(xmlGenericErrorContext,
11260 		    "PP: try IGNORE\n");break;
11261     }
11262 #endif
11263 
11264     if ((ctxt->input != NULL) &&
11265         (ctxt->input->cur - ctxt->input->base > 4096)) {
11266 	xmlSHRINK(ctxt);
11267 	ctxt->checkIndex = 0;
11268     }
11269     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11270 
11271     while (ctxt->instate != XML_PARSER_EOF) {
11272 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11273 	    return(0);
11274 
11275 	if (ctxt->input == NULL) break;
11276 	if (ctxt->input->buf == NULL)
11277 	    avail = ctxt->input->length -
11278 	            (ctxt->input->cur - ctxt->input->base);
11279 	else {
11280 	    /*
11281 	     * If we are operating on converted input, try to flush
11282 	     * remaining chars to avoid them stalling in the non-converted
11283 	     * buffer. But do not do this in document start where
11284 	     * encoding="..." may not have been read and we work on a
11285 	     * guessed encoding.
11286 	     */
11287 	    if ((ctxt->instate != XML_PARSER_START) &&
11288 	        (ctxt->input->buf->raw != NULL) &&
11289 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11290                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11291                                                  ctxt->input);
11292 		size_t current = ctxt->input->cur - ctxt->input->base;
11293 
11294 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11295                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11296                                       base, current);
11297 	    }
11298 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11299 		    (ctxt->input->cur - ctxt->input->base);
11300 	}
11301         if (avail < 1)
11302 	    goto done;
11303         switch (ctxt->instate) {
11304             case XML_PARSER_EOF:
11305 	        /*
11306 		 * Document parsing is done !
11307 		 */
11308 	        goto done;
11309             case XML_PARSER_START:
11310 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11311 		    xmlChar start[4];
11312 		    xmlCharEncoding enc;
11313 
11314 		    /*
11315 		     * Very first chars read from the document flow.
11316 		     */
11317 		    if (avail < 4)
11318 			goto done;
11319 
11320 		    /*
11321 		     * Get the 4 first bytes and decode the charset
11322 		     * if enc != XML_CHAR_ENCODING_NONE
11323 		     * plug some encoding conversion routines,
11324 		     * else xmlSwitchEncoding will set to (default)
11325 		     * UTF8.
11326 		     */
11327 		    start[0] = RAW;
11328 		    start[1] = NXT(1);
11329 		    start[2] = NXT(2);
11330 		    start[3] = NXT(3);
11331 		    enc = xmlDetectCharEncoding(start, 4);
11332 		    xmlSwitchEncoding(ctxt, enc);
11333 		    break;
11334 		}
11335 
11336 		if (avail < 2)
11337 		    goto done;
11338 		cur = ctxt->input->cur[0];
11339 		next = ctxt->input->cur[1];
11340 		if (cur == 0) {
11341 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11342 			ctxt->sax->setDocumentLocator(ctxt->userData,
11343 						      &xmlDefaultSAXLocator);
11344 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11345 		    xmlHaltParser(ctxt);
11346 #ifdef DEBUG_PUSH
11347 		    xmlGenericError(xmlGenericErrorContext,
11348 			    "PP: entering EOF\n");
11349 #endif
11350 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351 			ctxt->sax->endDocument(ctxt->userData);
11352 		    goto done;
11353 		}
11354 	        if ((cur == '<') && (next == '?')) {
11355 		    /* PI or XML decl */
11356 		    if (avail < 5) return(ret);
11357 		    if ((!terminate) &&
11358 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11359 			return(ret);
11360 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11361 			ctxt->sax->setDocumentLocator(ctxt->userData,
11362 						      &xmlDefaultSAXLocator);
11363 		    if ((ctxt->input->cur[2] == 'x') &&
11364 			(ctxt->input->cur[3] == 'm') &&
11365 			(ctxt->input->cur[4] == 'l') &&
11366 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11367 			ret += 5;
11368 #ifdef DEBUG_PUSH
11369 			xmlGenericError(xmlGenericErrorContext,
11370 				"PP: Parsing XML Decl\n");
11371 #endif
11372 			xmlParseXMLDecl(ctxt);
11373 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11374 			    /*
11375 			     * The XML REC instructs us to stop parsing right
11376 			     * here
11377 			     */
11378 			    xmlHaltParser(ctxt);
11379 			    return(0);
11380 			}
11381 			ctxt->standalone = ctxt->input->standalone;
11382 			if ((ctxt->encoding == NULL) &&
11383 			    (ctxt->input->encoding != NULL))
11384 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11385 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11386 			    (!ctxt->disableSAX))
11387 			    ctxt->sax->startDocument(ctxt->userData);
11388 			ctxt->instate = XML_PARSER_MISC;
11389 #ifdef DEBUG_PUSH
11390 			xmlGenericError(xmlGenericErrorContext,
11391 				"PP: entering MISC\n");
11392 #endif
11393 		    } else {
11394 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11395 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11396 			    (!ctxt->disableSAX))
11397 			    ctxt->sax->startDocument(ctxt->userData);
11398 			ctxt->instate = XML_PARSER_MISC;
11399 #ifdef DEBUG_PUSH
11400 			xmlGenericError(xmlGenericErrorContext,
11401 				"PP: entering MISC\n");
11402 #endif
11403 		    }
11404 		} else {
11405 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11406 			ctxt->sax->setDocumentLocator(ctxt->userData,
11407 						      &xmlDefaultSAXLocator);
11408 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11409 		    if (ctxt->version == NULL) {
11410 		        xmlErrMemory(ctxt, NULL);
11411 			break;
11412 		    }
11413 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11414 		        (!ctxt->disableSAX))
11415 			ctxt->sax->startDocument(ctxt->userData);
11416 		    ctxt->instate = XML_PARSER_MISC;
11417 #ifdef DEBUG_PUSH
11418 		    xmlGenericError(xmlGenericErrorContext,
11419 			    "PP: entering MISC\n");
11420 #endif
11421 		}
11422 		break;
11423             case XML_PARSER_START_TAG: {
11424 	        const xmlChar *name;
11425 		const xmlChar *prefix = NULL;
11426 		const xmlChar *URI = NULL;
11427                 int line = ctxt->input->line;
11428 		int nsNr = ctxt->nsNr;
11429 
11430 		if ((avail < 2) && (ctxt->inputNr == 1))
11431 		    goto done;
11432 		cur = ctxt->input->cur[0];
11433 	        if (cur != '<') {
11434 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11435 		    xmlHaltParser(ctxt);
11436 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437 			ctxt->sax->endDocument(ctxt->userData);
11438 		    goto done;
11439 		}
11440 		if (!terminate) {
11441 		    if (ctxt->progressive) {
11442 		        /* > can be found unescaped in attribute values */
11443 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11444 			    goto done;
11445 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11446 			goto done;
11447 		    }
11448 		}
11449 		if (ctxt->spaceNr == 0)
11450 		    spacePush(ctxt, -1);
11451 		else if (*ctxt->space == -2)
11452 		    spacePush(ctxt, -1);
11453 		else
11454 		    spacePush(ctxt, *ctxt->space);
11455 #ifdef LIBXML_SAX1_ENABLED
11456 		if (ctxt->sax2)
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11459 #ifdef LIBXML_SAX1_ENABLED
11460 		else
11461 		    name = xmlParseStartTag(ctxt);
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 		if (ctxt->instate == XML_PARSER_EOF)
11464 		    goto done;
11465 		if (name == NULL) {
11466 		    spacePop(ctxt);
11467 		    xmlHaltParser(ctxt);
11468 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11469 			ctxt->sax->endDocument(ctxt->userData);
11470 		    goto done;
11471 		}
11472 #ifdef LIBXML_VALID_ENABLED
11473 		/*
11474 		 * [ VC: Root Element Type ]
11475 		 * The Name in the document type declaration must match
11476 		 * the element type of the root element.
11477 		 */
11478 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11479 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11480 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11481 #endif /* LIBXML_VALID_ENABLED */
11482 
11483 		/*
11484 		 * Check for an Empty Element.
11485 		 */
11486 		if ((RAW == '/') && (NXT(1) == '>')) {
11487 		    SKIP(2);
11488 
11489 		    if (ctxt->sax2) {
11490 			if ((ctxt->sax != NULL) &&
11491 			    (ctxt->sax->endElementNs != NULL) &&
11492 			    (!ctxt->disableSAX))
11493 			    ctxt->sax->endElementNs(ctxt->userData, name,
11494 			                            prefix, URI);
11495 			if (ctxt->nsNr - nsNr > 0)
11496 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11497 #ifdef LIBXML_SAX1_ENABLED
11498 		    } else {
11499 			if ((ctxt->sax != NULL) &&
11500 			    (ctxt->sax->endElement != NULL) &&
11501 			    (!ctxt->disableSAX))
11502 			    ctxt->sax->endElement(ctxt->userData, name);
11503 #endif /* LIBXML_SAX1_ENABLED */
11504 		    }
11505 		    if (ctxt->instate == XML_PARSER_EOF)
11506 			goto done;
11507 		    spacePop(ctxt);
11508 		    if (ctxt->nameNr == 0) {
11509 			ctxt->instate = XML_PARSER_EPILOG;
11510 		    } else {
11511 			ctxt->instate = XML_PARSER_CONTENT;
11512 		    }
11513                     ctxt->progressive = 1;
11514 		    break;
11515 		}
11516 		if (RAW == '>') {
11517 		    NEXT;
11518 		} else {
11519 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11520 					 "Couldn't find end of Start Tag %s\n",
11521 					 name);
11522 		    nodePop(ctxt);
11523 		    spacePop(ctxt);
11524 		}
11525                 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11526 
11527 		ctxt->instate = XML_PARSER_CONTENT;
11528                 ctxt->progressive = 1;
11529                 break;
11530 	    }
11531             case XML_PARSER_CONTENT: {
11532 		int id;
11533 		unsigned long cons;
11534 		if ((avail < 2) && (ctxt->inputNr == 1))
11535 		    goto done;
11536 		cur = ctxt->input->cur[0];
11537 		next = ctxt->input->cur[1];
11538 
11539 		id = ctxt->input->id;
11540 	        cons = CUR_CONSUMED;
11541 		if ((cur == '<') && (next == '/')) {
11542 		    ctxt->instate = XML_PARSER_END_TAG;
11543 		    break;
11544 	        } else if ((cur == '<') && (next == '?')) {
11545 		    if ((!terminate) &&
11546 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11547                         ctxt->progressive = XML_PARSER_PI;
11548 			goto done;
11549                     }
11550 		    xmlParsePI(ctxt);
11551 		    ctxt->instate = XML_PARSER_CONTENT;
11552                     ctxt->progressive = 1;
11553 		} else if ((cur == '<') && (next != '!')) {
11554 		    ctxt->instate = XML_PARSER_START_TAG;
11555 		    break;
11556 		} else if ((cur == '<') && (next == '!') &&
11557 		           (ctxt->input->cur[2] == '-') &&
11558 			   (ctxt->input->cur[3] == '-')) {
11559 		    int term;
11560 
11561 	            if (avail < 4)
11562 		        goto done;
11563 		    ctxt->input->cur += 4;
11564 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11565 		    ctxt->input->cur -= 4;
11566 		    if ((!terminate) && (term < 0)) {
11567                         ctxt->progressive = XML_PARSER_COMMENT;
11568 			goto done;
11569                     }
11570 		    xmlParseComment(ctxt);
11571 		    ctxt->instate = XML_PARSER_CONTENT;
11572                     ctxt->progressive = 1;
11573 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11574 		    (ctxt->input->cur[2] == '[') &&
11575 		    (ctxt->input->cur[3] == 'C') &&
11576 		    (ctxt->input->cur[4] == 'D') &&
11577 		    (ctxt->input->cur[5] == 'A') &&
11578 		    (ctxt->input->cur[6] == 'T') &&
11579 		    (ctxt->input->cur[7] == 'A') &&
11580 		    (ctxt->input->cur[8] == '[')) {
11581 		    SKIP(9);
11582 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11583 		    break;
11584 		} else if ((cur == '<') && (next == '!') &&
11585 		           (avail < 9)) {
11586 		    goto done;
11587 		} else if (cur == '&') {
11588 		    if ((!terminate) &&
11589 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11590 			goto done;
11591 		    xmlParseReference(ctxt);
11592 		} else {
11593 		    /* TODO Avoid the extra copy, handle directly !!! */
11594 		    /*
11595 		     * Goal of the following test is:
11596 		     *  - minimize calls to the SAX 'character' callback
11597 		     *    when they are mergeable
11598 		     *  - handle an problem for isBlank when we only parse
11599 		     *    a sequence of blank chars and the next one is
11600 		     *    not available to check against '<' presence.
11601 		     *  - tries to homogenize the differences in SAX
11602 		     *    callbacks between the push and pull versions
11603 		     *    of the parser.
11604 		     */
11605 		    if ((ctxt->inputNr == 1) &&
11606 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11607 			if (!terminate) {
11608 			    if (ctxt->progressive) {
11609 				if ((lastlt == NULL) ||
11610 				    (ctxt->input->cur > lastlt))
11611 				    goto done;
11612 			    } else if (xmlParseLookupSequence(ctxt,
11613 			                                      '<', 0, 0) < 0) {
11614 				goto done;
11615 			    }
11616 			}
11617                     }
11618 		    ctxt->checkIndex = 0;
11619 		    xmlParseCharData(ctxt, 0);
11620 		}
11621 		if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11622 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11623 		                "detected an error in element content\n");
11624 		    xmlHaltParser(ctxt);
11625 		    break;
11626 		}
11627 		break;
11628 	    }
11629             case XML_PARSER_END_TAG:
11630 		if (avail < 2)
11631 		    goto done;
11632 		if (!terminate) {
11633 		    if (ctxt->progressive) {
11634 		        /* > can be found unescaped in attribute values */
11635 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11636 			    goto done;
11637 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11638 			goto done;
11639 		    }
11640 		}
11641 		if (ctxt->sax2) {
11642 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11643 		    nameNsPop(ctxt);
11644 		}
11645 #ifdef LIBXML_SAX1_ENABLED
11646 		  else
11647 		    xmlParseEndTag1(ctxt, 0);
11648 #endif /* LIBXML_SAX1_ENABLED */
11649 		if (ctxt->instate == XML_PARSER_EOF) {
11650 		    /* Nothing */
11651 		} else if (ctxt->nameNr == 0) {
11652 		    ctxt->instate = XML_PARSER_EPILOG;
11653 		} else {
11654 		    ctxt->instate = XML_PARSER_CONTENT;
11655 		}
11656 		break;
11657             case XML_PARSER_CDATA_SECTION: {
11658 	        /*
11659 		 * The Push mode need to have the SAX callback for
11660 		 * cdataBlock merge back contiguous callbacks.
11661 		 */
11662 		int base;
11663 
11664 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11665 		if (base < 0) {
11666 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11667 		        int tmp;
11668 
11669 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11670 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11671 			if (tmp < 0) {
11672 			    tmp = -tmp;
11673 			    ctxt->input->cur += tmp;
11674 			    goto encoding_error;
11675 			}
11676 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11677 			    if (ctxt->sax->cdataBlock != NULL)
11678 				ctxt->sax->cdataBlock(ctxt->userData,
11679 				                      ctxt->input->cur, tmp);
11680 			    else if (ctxt->sax->characters != NULL)
11681 				ctxt->sax->characters(ctxt->userData,
11682 				                      ctxt->input->cur, tmp);
11683 			}
11684 			if (ctxt->instate == XML_PARSER_EOF)
11685 			    goto done;
11686 			SKIPL(tmp);
11687 			ctxt->checkIndex = 0;
11688 		    }
11689 		    goto done;
11690 		} else {
11691 		    int tmp;
11692 
11693 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11694 		    if ((tmp < 0) || (tmp != base)) {
11695 			tmp = -tmp;
11696 			ctxt->input->cur += tmp;
11697 			goto encoding_error;
11698 		    }
11699 		    if ((ctxt->sax != NULL) && (base == 0) &&
11700 		        (ctxt->sax->cdataBlock != NULL) &&
11701 		        (!ctxt->disableSAX)) {
11702 			/*
11703 			 * Special case to provide identical behaviour
11704 			 * between pull and push parsers on enpty CDATA
11705 			 * sections
11706 			 */
11707 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11708 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11709 			               "<![CDATA[", 9)))
11710 			     ctxt->sax->cdataBlock(ctxt->userData,
11711 			                           BAD_CAST "", 0);
11712 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11713 			(!ctxt->disableSAX)) {
11714 			if (ctxt->sax->cdataBlock != NULL)
11715 			    ctxt->sax->cdataBlock(ctxt->userData,
11716 						  ctxt->input->cur, base);
11717 			else if (ctxt->sax->characters != NULL)
11718 			    ctxt->sax->characters(ctxt->userData,
11719 						  ctxt->input->cur, base);
11720 		    }
11721 		    if (ctxt->instate == XML_PARSER_EOF)
11722 			goto done;
11723 		    SKIPL(base + 3);
11724 		    ctxt->checkIndex = 0;
11725 		    ctxt->instate = XML_PARSER_CONTENT;
11726 #ifdef DEBUG_PUSH
11727 		    xmlGenericError(xmlGenericErrorContext,
11728 			    "PP: entering CONTENT\n");
11729 #endif
11730 		}
11731 		break;
11732 	    }
11733             case XML_PARSER_MISC:
11734 		SKIP_BLANKS;
11735 		if (ctxt->input->buf == NULL)
11736 		    avail = ctxt->input->length -
11737 		            (ctxt->input->cur - ctxt->input->base);
11738 		else
11739 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11740 		            (ctxt->input->cur - ctxt->input->base);
11741 		if (avail < 2)
11742 		    goto done;
11743 		cur = ctxt->input->cur[0];
11744 		next = ctxt->input->cur[1];
11745 	        if ((cur == '<') && (next == '?')) {
11746 		    if ((!terminate) &&
11747 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748                         ctxt->progressive = XML_PARSER_PI;
11749 			goto done;
11750                     }
11751 #ifdef DEBUG_PUSH
11752 		    xmlGenericError(xmlGenericErrorContext,
11753 			    "PP: Parsing PI\n");
11754 #endif
11755 		    xmlParsePI(ctxt);
11756 		    if (ctxt->instate == XML_PARSER_EOF)
11757 			goto done;
11758 		    ctxt->instate = XML_PARSER_MISC;
11759                     ctxt->progressive = 1;
11760 		    ctxt->checkIndex = 0;
11761 		} else if ((cur == '<') && (next == '!') &&
11762 		    (ctxt->input->cur[2] == '-') &&
11763 		    (ctxt->input->cur[3] == '-')) {
11764 		    if ((!terminate) &&
11765 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11766                         ctxt->progressive = XML_PARSER_COMMENT;
11767 			goto done;
11768                     }
11769 #ifdef DEBUG_PUSH
11770 		    xmlGenericError(xmlGenericErrorContext,
11771 			    "PP: Parsing Comment\n");
11772 #endif
11773 		    xmlParseComment(ctxt);
11774 		    if (ctxt->instate == XML_PARSER_EOF)
11775 			goto done;
11776 		    ctxt->instate = XML_PARSER_MISC;
11777                     ctxt->progressive = 1;
11778 		    ctxt->checkIndex = 0;
11779 		} else if ((cur == '<') && (next == '!') &&
11780 		    (ctxt->input->cur[2] == 'D') &&
11781 		    (ctxt->input->cur[3] == 'O') &&
11782 		    (ctxt->input->cur[4] == 'C') &&
11783 		    (ctxt->input->cur[5] == 'T') &&
11784 		    (ctxt->input->cur[6] == 'Y') &&
11785 		    (ctxt->input->cur[7] == 'P') &&
11786 		    (ctxt->input->cur[8] == 'E')) {
11787 		    if ((!terminate) &&
11788 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11789                         ctxt->progressive = XML_PARSER_DTD;
11790 			goto done;
11791                     }
11792 #ifdef DEBUG_PUSH
11793 		    xmlGenericError(xmlGenericErrorContext,
11794 			    "PP: Parsing internal subset\n");
11795 #endif
11796 		    ctxt->inSubset = 1;
11797                     ctxt->progressive = 0;
11798 		    ctxt->checkIndex = 0;
11799 		    xmlParseDocTypeDecl(ctxt);
11800 		    if (ctxt->instate == XML_PARSER_EOF)
11801 			goto done;
11802 		    if (RAW == '[') {
11803 			ctxt->instate = XML_PARSER_DTD;
11804 #ifdef DEBUG_PUSH
11805 			xmlGenericError(xmlGenericErrorContext,
11806 				"PP: entering DTD\n");
11807 #endif
11808 		    } else {
11809 			/*
11810 			 * Create and update the external subset.
11811 			 */
11812 			ctxt->inSubset = 2;
11813 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11814 			    (ctxt->sax->externalSubset != NULL))
11815 			    ctxt->sax->externalSubset(ctxt->userData,
11816 				    ctxt->intSubName, ctxt->extSubSystem,
11817 				    ctxt->extSubURI);
11818 			ctxt->inSubset = 0;
11819 			xmlCleanSpecialAttr(ctxt);
11820 			ctxt->instate = XML_PARSER_PROLOG;
11821 #ifdef DEBUG_PUSH
11822 			xmlGenericError(xmlGenericErrorContext,
11823 				"PP: entering PROLOG\n");
11824 #endif
11825 		    }
11826 		} else if ((cur == '<') && (next == '!') &&
11827 		           (avail < 9)) {
11828 		    goto done;
11829 		} else {
11830 		    ctxt->instate = XML_PARSER_START_TAG;
11831 		    ctxt->progressive = XML_PARSER_START_TAG;
11832 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11833 #ifdef DEBUG_PUSH
11834 		    xmlGenericError(xmlGenericErrorContext,
11835 			    "PP: entering START_TAG\n");
11836 #endif
11837 		}
11838 		break;
11839             case XML_PARSER_PROLOG:
11840 		SKIP_BLANKS;
11841 		if (ctxt->input->buf == NULL)
11842 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11843 		else
11844 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11845                             (ctxt->input->cur - ctxt->input->base);
11846 		if (avail < 2)
11847 		    goto done;
11848 		cur = ctxt->input->cur[0];
11849 		next = ctxt->input->cur[1];
11850 	        if ((cur == '<') && (next == '?')) {
11851 		    if ((!terminate) &&
11852 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853                         ctxt->progressive = XML_PARSER_PI;
11854 			goto done;
11855                     }
11856 #ifdef DEBUG_PUSH
11857 		    xmlGenericError(xmlGenericErrorContext,
11858 			    "PP: Parsing PI\n");
11859 #endif
11860 		    xmlParsePI(ctxt);
11861 		    if (ctxt->instate == XML_PARSER_EOF)
11862 			goto done;
11863 		    ctxt->instate = XML_PARSER_PROLOG;
11864                     ctxt->progressive = 1;
11865 		} else if ((cur == '<') && (next == '!') &&
11866 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867 		    if ((!terminate) &&
11868 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869                         ctxt->progressive = XML_PARSER_COMMENT;
11870 			goto done;
11871                     }
11872 #ifdef DEBUG_PUSH
11873 		    xmlGenericError(xmlGenericErrorContext,
11874 			    "PP: Parsing Comment\n");
11875 #endif
11876 		    xmlParseComment(ctxt);
11877 		    if (ctxt->instate == XML_PARSER_EOF)
11878 			goto done;
11879 		    ctxt->instate = XML_PARSER_PROLOG;
11880                     ctxt->progressive = 1;
11881 		} else if ((cur == '<') && (next == '!') &&
11882 		           (avail < 4)) {
11883 		    goto done;
11884 		} else {
11885 		    ctxt->instate = XML_PARSER_START_TAG;
11886 		    if (ctxt->progressive == 0)
11887 			ctxt->progressive = XML_PARSER_START_TAG;
11888 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11889 #ifdef DEBUG_PUSH
11890 		    xmlGenericError(xmlGenericErrorContext,
11891 			    "PP: entering START_TAG\n");
11892 #endif
11893 		}
11894 		break;
11895             case XML_PARSER_EPILOG:
11896 		SKIP_BLANKS;
11897 		if (ctxt->input->buf == NULL)
11898 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11899 		else
11900 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11901                             (ctxt->input->cur - ctxt->input->base);
11902 		if (avail < 2)
11903 		    goto done;
11904 		cur = ctxt->input->cur[0];
11905 		next = ctxt->input->cur[1];
11906 	        if ((cur == '<') && (next == '?')) {
11907 		    if ((!terminate) &&
11908 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909                         ctxt->progressive = XML_PARSER_PI;
11910 			goto done;
11911                     }
11912 #ifdef DEBUG_PUSH
11913 		    xmlGenericError(xmlGenericErrorContext,
11914 			    "PP: Parsing PI\n");
11915 #endif
11916 		    xmlParsePI(ctxt);
11917 		    if (ctxt->instate == XML_PARSER_EOF)
11918 			goto done;
11919 		    ctxt->instate = XML_PARSER_EPILOG;
11920                     ctxt->progressive = 1;
11921 		} else if ((cur == '<') && (next == '!') &&
11922 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11923 		    if ((!terminate) &&
11924 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11925                         ctxt->progressive = XML_PARSER_COMMENT;
11926 			goto done;
11927                     }
11928 #ifdef DEBUG_PUSH
11929 		    xmlGenericError(xmlGenericErrorContext,
11930 			    "PP: Parsing Comment\n");
11931 #endif
11932 		    xmlParseComment(ctxt);
11933 		    if (ctxt->instate == XML_PARSER_EOF)
11934 			goto done;
11935 		    ctxt->instate = XML_PARSER_EPILOG;
11936                     ctxt->progressive = 1;
11937 		} else if ((cur == '<') && (next == '!') &&
11938 		           (avail < 4)) {
11939 		    goto done;
11940 		} else {
11941 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11942 		    xmlHaltParser(ctxt);
11943 #ifdef DEBUG_PUSH
11944 		    xmlGenericError(xmlGenericErrorContext,
11945 			    "PP: entering EOF\n");
11946 #endif
11947 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11948 			ctxt->sax->endDocument(ctxt->userData);
11949 		    goto done;
11950 		}
11951 		break;
11952             case XML_PARSER_DTD: {
11953 	        /*
11954 		 * Sorry but progressive parsing of the internal subset
11955 		 * is not expected to be supported. We first check that
11956 		 * the full content of the internal subset is available and
11957 		 * the parsing is launched only at that point.
11958 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11959 		 * section and not in a ']]>' sequence which are conditional
11960 		 * sections (whoever argued to keep that crap in XML deserve
11961 		 * a place in hell !).
11962 		 */
11963 		int base, i;
11964 		xmlChar *buf;
11965 	        xmlChar quote = 0;
11966                 size_t use;
11967 
11968 		base = ctxt->input->cur - ctxt->input->base;
11969 		if (base < 0) return(0);
11970 		if (ctxt->checkIndex > base)
11971 		    base = ctxt->checkIndex;
11972 		buf = xmlBufContent(ctxt->input->buf->buffer);
11973                 use = xmlBufUse(ctxt->input->buf->buffer);
11974 		for (;(unsigned int) base < use; base++) {
11975 		    if (quote != 0) {
11976 		        if (buf[base] == quote)
11977 			    quote = 0;
11978 			continue;
11979 		    }
11980 		    if ((quote == 0) && (buf[base] == '<')) {
11981 		        int found  = 0;
11982 			/* special handling of comments */
11983 		        if (((unsigned int) base + 4 < use) &&
11984 			    (buf[base + 1] == '!') &&
11985 			    (buf[base + 2] == '-') &&
11986 			    (buf[base + 3] == '-')) {
11987 			    for (;(unsigned int) base + 3 < use; base++) {
11988 				if ((buf[base] == '-') &&
11989 				    (buf[base + 1] == '-') &&
11990 				    (buf[base + 2] == '>')) {
11991 				    found = 1;
11992 				    base += 2;
11993 				    break;
11994 				}
11995 		            }
11996 			    if (!found) {
11997 #if 0
11998 			        fprintf(stderr, "unfinished comment\n");
11999 #endif
12000 			        break; /* for */
12001 		            }
12002 		            continue;
12003 			}
12004 		    }
12005 		    if (buf[base] == '"') {
12006 		        quote = '"';
12007 			continue;
12008 		    }
12009 		    if (buf[base] == '\'') {
12010 		        quote = '\'';
12011 			continue;
12012 		    }
12013 		    if (buf[base] == ']') {
12014 #if 0
12015 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12016 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12017 #endif
12018 		        if ((unsigned int) base +1 >= use)
12019 			    break;
12020 			if (buf[base + 1] == ']') {
12021 			    /* conditional crap, skip both ']' ! */
12022 			    base++;
12023 			    continue;
12024 			}
12025 		        for (i = 1; (unsigned int) base + i < use; i++) {
12026 			    if (buf[base + i] == '>') {
12027 #if 0
12028 			        fprintf(stderr, "found\n");
12029 #endif
12030 			        goto found_end_int_subset;
12031 			    }
12032 			    if (!IS_BLANK_CH(buf[base + i])) {
12033 #if 0
12034 			        fprintf(stderr, "not found\n");
12035 #endif
12036 			        goto not_end_of_int_subset;
12037 			    }
12038 			}
12039 #if 0
12040 			fprintf(stderr, "end of stream\n");
12041 #endif
12042 		        break;
12043 
12044 		    }
12045 not_end_of_int_subset:
12046                     continue; /* for */
12047 		}
12048 		/*
12049 		 * We didn't found the end of the Internal subset
12050 		 */
12051                 if (quote == 0)
12052                     ctxt->checkIndex = base;
12053                 else
12054                     ctxt->checkIndex = 0;
12055 #ifdef DEBUG_PUSH
12056 		if (next == 0)
12057 		    xmlGenericError(xmlGenericErrorContext,
12058 			    "PP: lookup of int subset end filed\n");
12059 #endif
12060 	        goto done;
12061 
12062 found_end_int_subset:
12063                 ctxt->checkIndex = 0;
12064 		xmlParseInternalSubset(ctxt);
12065 		if (ctxt->instate == XML_PARSER_EOF)
12066 		    goto done;
12067 		ctxt->inSubset = 2;
12068 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12069 		    (ctxt->sax->externalSubset != NULL))
12070 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12071 			    ctxt->extSubSystem, ctxt->extSubURI);
12072 		ctxt->inSubset = 0;
12073 		xmlCleanSpecialAttr(ctxt);
12074 		if (ctxt->instate == XML_PARSER_EOF)
12075 		    goto done;
12076 		ctxt->instate = XML_PARSER_PROLOG;
12077 		ctxt->checkIndex = 0;
12078 #ifdef DEBUG_PUSH
12079 		xmlGenericError(xmlGenericErrorContext,
12080 			"PP: entering PROLOG\n");
12081 #endif
12082                 break;
12083 	    }
12084             case XML_PARSER_COMMENT:
12085 		xmlGenericError(xmlGenericErrorContext,
12086 			"PP: internal error, state == COMMENT\n");
12087 		ctxt->instate = XML_PARSER_CONTENT;
12088 #ifdef DEBUG_PUSH
12089 		xmlGenericError(xmlGenericErrorContext,
12090 			"PP: entering CONTENT\n");
12091 #endif
12092 		break;
12093             case XML_PARSER_IGNORE:
12094 		xmlGenericError(xmlGenericErrorContext,
12095 			"PP: internal error, state == IGNORE");
12096 	        ctxt->instate = XML_PARSER_DTD;
12097 #ifdef DEBUG_PUSH
12098 		xmlGenericError(xmlGenericErrorContext,
12099 			"PP: entering DTD\n");
12100 #endif
12101 	        break;
12102             case XML_PARSER_PI:
12103 		xmlGenericError(xmlGenericErrorContext,
12104 			"PP: internal error, state == PI\n");
12105 		ctxt->instate = XML_PARSER_CONTENT;
12106 #ifdef DEBUG_PUSH
12107 		xmlGenericError(xmlGenericErrorContext,
12108 			"PP: entering CONTENT\n");
12109 #endif
12110 		break;
12111             case XML_PARSER_ENTITY_DECL:
12112 		xmlGenericError(xmlGenericErrorContext,
12113 			"PP: internal error, state == ENTITY_DECL\n");
12114 		ctxt->instate = XML_PARSER_DTD;
12115 #ifdef DEBUG_PUSH
12116 		xmlGenericError(xmlGenericErrorContext,
12117 			"PP: entering DTD\n");
12118 #endif
12119 		break;
12120             case XML_PARSER_ENTITY_VALUE:
12121 		xmlGenericError(xmlGenericErrorContext,
12122 			"PP: internal error, state == ENTITY_VALUE\n");
12123 		ctxt->instate = XML_PARSER_CONTENT;
12124 #ifdef DEBUG_PUSH
12125 		xmlGenericError(xmlGenericErrorContext,
12126 			"PP: entering DTD\n");
12127 #endif
12128 		break;
12129             case XML_PARSER_ATTRIBUTE_VALUE:
12130 		xmlGenericError(xmlGenericErrorContext,
12131 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12132 		ctxt->instate = XML_PARSER_START_TAG;
12133 #ifdef DEBUG_PUSH
12134 		xmlGenericError(xmlGenericErrorContext,
12135 			"PP: entering START_TAG\n");
12136 #endif
12137 		break;
12138             case XML_PARSER_SYSTEM_LITERAL:
12139 		xmlGenericError(xmlGenericErrorContext,
12140 			"PP: internal error, state == SYSTEM_LITERAL\n");
12141 		ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 		xmlGenericError(xmlGenericErrorContext,
12144 			"PP: entering START_TAG\n");
12145 #endif
12146 		break;
12147             case XML_PARSER_PUBLIC_LITERAL:
12148 		xmlGenericError(xmlGenericErrorContext,
12149 			"PP: internal error, state == PUBLIC_LITERAL\n");
12150 		ctxt->instate = XML_PARSER_START_TAG;
12151 #ifdef DEBUG_PUSH
12152 		xmlGenericError(xmlGenericErrorContext,
12153 			"PP: entering START_TAG\n");
12154 #endif
12155 		break;
12156 	}
12157     }
12158 done:
12159 #ifdef DEBUG_PUSH
12160     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12161 #endif
12162     return(ret);
12163 encoding_error:
12164     {
12165         char buffer[150];
12166 
12167 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168 			ctxt->input->cur[0], ctxt->input->cur[1],
12169 			ctxt->input->cur[2], ctxt->input->cur[3]);
12170 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12171 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12172 		     BAD_CAST buffer, NULL);
12173     }
12174     return(0);
12175 }
12176 
12177 /**
12178  * xmlParseCheckTransition:
12179  * @ctxt:  an XML parser context
12180  * @chunk:  a char array
12181  * @size:  the size in byte of the chunk
12182  *
12183  * Check depending on the current parser state if the chunk given must be
12184  * processed immediately or one need more data to advance on parsing.
12185  *
12186  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12187  */
12188 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12189 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12190     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12191         return(-1);
12192     if (ctxt->instate == XML_PARSER_START_TAG) {
12193         if (memchr(chunk, '>', size) != NULL)
12194             return(1);
12195         return(0);
12196     }
12197     if (ctxt->progressive == XML_PARSER_COMMENT) {
12198         if (memchr(chunk, '>', size) != NULL)
12199             return(1);
12200         return(0);
12201     }
12202     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12203         if (memchr(chunk, '>', size) != NULL)
12204             return(1);
12205         return(0);
12206     }
12207     if (ctxt->progressive == XML_PARSER_PI) {
12208         if (memchr(chunk, '>', size) != NULL)
12209             return(1);
12210         return(0);
12211     }
12212     if (ctxt->instate == XML_PARSER_END_TAG) {
12213         if (memchr(chunk, '>', size) != NULL)
12214             return(1);
12215         return(0);
12216     }
12217     if ((ctxt->progressive == XML_PARSER_DTD) ||
12218         (ctxt->instate == XML_PARSER_DTD)) {
12219         if (memchr(chunk, '>', size) != NULL)
12220             return(1);
12221         return(0);
12222     }
12223     return(1);
12224 }
12225 
12226 /**
12227  * xmlParseChunk:
12228  * @ctxt:  an XML parser context
12229  * @chunk:  an char array
12230  * @size:  the size in byte of the chunk
12231  * @terminate:  last chunk indicator
12232  *
12233  * Parse a Chunk of memory
12234  *
12235  * Returns zero if no error, the xmlParserErrors otherwise.
12236  */
12237 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12238 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12239               int terminate) {
12240     int end_in_lf = 0;
12241     int remain = 0;
12242     size_t old_avail = 0;
12243     size_t avail = 0;
12244 
12245     if (ctxt == NULL)
12246         return(XML_ERR_INTERNAL_ERROR);
12247     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12248         return(ctxt->errNo);
12249     if (ctxt->instate == XML_PARSER_EOF)
12250         return(-1);
12251     if (ctxt->instate == XML_PARSER_START)
12252         xmlDetectSAX2(ctxt);
12253     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12254         (chunk[size - 1] == '\r')) {
12255 	end_in_lf = 1;
12256 	size--;
12257     }
12258 
12259 xmldecl_done:
12260 
12261     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12262         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12263 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12264 	size_t cur = ctxt->input->cur - ctxt->input->base;
12265 	int res;
12266 
12267         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12268         /*
12269          * Specific handling if we autodetected an encoding, we should not
12270          * push more than the first line ... which depend on the encoding
12271          * And only push the rest once the final encoding was detected
12272          */
12273         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12274             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12275             unsigned int len = 45;
12276 
12277             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278                                BAD_CAST "UTF-16")) ||
12279                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280                                BAD_CAST "UTF16")))
12281                 len = 90;
12282             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283                                     BAD_CAST "UCS-4")) ||
12284                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285                                     BAD_CAST "UCS4")))
12286                 len = 180;
12287 
12288             if (ctxt->input->buf->rawconsumed < len)
12289                 len -= ctxt->input->buf->rawconsumed;
12290 
12291             /*
12292              * Change size for reading the initial declaration only
12293              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294              * will blindly copy extra bytes from memory.
12295              */
12296             if ((unsigned int) size > len) {
12297                 remain = size - len;
12298                 size = len;
12299             } else {
12300                 remain = 0;
12301             }
12302         }
12303 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12304         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12305 	if (res < 0) {
12306 	    ctxt->errNo = XML_PARSER_EOF;
12307 	    xmlHaltParser(ctxt);
12308 	    return (XML_PARSER_EOF);
12309 	}
12310 #ifdef DEBUG_PUSH
12311 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12312 #endif
12313 
12314     } else if (ctxt->instate != XML_PARSER_EOF) {
12315 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12316 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12317 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12318 		    (in->raw != NULL)) {
12319 		int nbchars;
12320 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12321 		size_t current = ctxt->input->cur - ctxt->input->base;
12322 
12323 		nbchars = xmlCharEncInput(in, terminate);
12324 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12325 		if (nbchars < 0) {
12326 		    /* TODO 2.6.0 */
12327 		    xmlGenericError(xmlGenericErrorContext,
12328 				    "xmlParseChunk: encoder error\n");
12329                     xmlHaltParser(ctxt);
12330 		    return(XML_ERR_INVALID_ENCODING);
12331 		}
12332 	    }
12333 	}
12334     }
12335     if (remain != 0) {
12336         xmlParseTryOrFinish(ctxt, 0);
12337     } else {
12338         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12339             avail = xmlBufUse(ctxt->input->buf->buffer);
12340         /*
12341          * Depending on the current state it may not be such
12342          * a good idea to try parsing if there is nothing in the chunk
12343          * which would be worth doing a parser state transition and we
12344          * need to wait for more data
12345          */
12346         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12347             (old_avail == 0) || (avail == 0) ||
12348             (xmlParseCheckTransition(ctxt,
12349                        (const char *)&ctxt->input->base[old_avail],
12350                                      avail - old_avail)))
12351             xmlParseTryOrFinish(ctxt, terminate);
12352     }
12353     if (ctxt->instate == XML_PARSER_EOF)
12354         return(ctxt->errNo);
12355 
12356     if ((ctxt->input != NULL) &&
12357          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12358          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12359         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12360         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12361         xmlHaltParser(ctxt);
12362     }
12363     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12364         return(ctxt->errNo);
12365 
12366     if (remain != 0) {
12367         chunk += size;
12368         size = remain;
12369         remain = 0;
12370         goto xmldecl_done;
12371     }
12372     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12373         (ctxt->input->buf != NULL)) {
12374 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12375 					 ctxt->input);
12376 	size_t current = ctxt->input->cur - ctxt->input->base;
12377 
12378 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12379 
12380 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12381 			      base, current);
12382     }
12383     if (terminate) {
12384 	/*
12385 	 * Check for termination
12386 	 */
12387 	int cur_avail = 0;
12388 
12389 	if (ctxt->input != NULL) {
12390 	    if (ctxt->input->buf == NULL)
12391 		cur_avail = ctxt->input->length -
12392 			    (ctxt->input->cur - ctxt->input->base);
12393 	    else
12394 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12395 			              (ctxt->input->cur - ctxt->input->base);
12396 	}
12397 
12398 	if ((ctxt->instate != XML_PARSER_EOF) &&
12399 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12400 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12401 	}
12402 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12403 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12404 	}
12405 	if (ctxt->instate != XML_PARSER_EOF) {
12406 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12407 		ctxt->sax->endDocument(ctxt->userData);
12408 	}
12409 	ctxt->instate = XML_PARSER_EOF;
12410     }
12411     if (ctxt->wellFormed == 0)
12412 	return((xmlParserErrors) ctxt->errNo);
12413     else
12414         return(0);
12415 }
12416 
12417 /************************************************************************
12418  *									*
12419  *		I/O front end functions to the parser			*
12420  *									*
12421  ************************************************************************/
12422 
12423 /**
12424  * xmlCreatePushParserCtxt:
12425  * @sax:  a SAX handler
12426  * @user_data:  The user data returned on SAX callbacks
12427  * @chunk:  a pointer to an array of chars
12428  * @size:  number of chars in the array
12429  * @filename:  an optional file name or URI
12430  *
12431  * Create a parser context for using the XML parser in push mode.
12432  * If @buffer and @size are non-NULL, the data is used to detect
12433  * the encoding.  The remaining characters will be parsed so they
12434  * don't need to be fed in again through xmlParseChunk.
12435  * To allow content encoding detection, @size should be >= 4
12436  * The value of @filename is used for fetching external entities
12437  * and error/warning reports.
12438  *
12439  * Returns the new parser context or NULL
12440  */
12441 
12442 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12443 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12444                         const char *chunk, int size, const char *filename) {
12445     xmlParserCtxtPtr ctxt;
12446     xmlParserInputPtr inputStream;
12447     xmlParserInputBufferPtr buf;
12448     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12449 
12450     /*
12451      * plug some encoding conversion routines
12452      */
12453     if ((chunk != NULL) && (size >= 4))
12454 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12455 
12456     buf = xmlAllocParserInputBuffer(enc);
12457     if (buf == NULL) return(NULL);
12458 
12459     ctxt = xmlNewParserCtxt();
12460     if (ctxt == NULL) {
12461         xmlErrMemory(NULL, "creating parser: out of memory\n");
12462 	xmlFreeParserInputBuffer(buf);
12463 	return(NULL);
12464     }
12465     ctxt->dictNames = 1;
12466     if (sax != NULL) {
12467 #ifdef LIBXML_SAX1_ENABLED
12468 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12469 #endif /* LIBXML_SAX1_ENABLED */
12470 	    xmlFree(ctxt->sax);
12471 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12472 	if (ctxt->sax == NULL) {
12473 	    xmlErrMemory(ctxt, NULL);
12474 	    xmlFreeParserInputBuffer(buf);
12475 	    xmlFreeParserCtxt(ctxt);
12476 	    return(NULL);
12477 	}
12478 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12479 	if (sax->initialized == XML_SAX2_MAGIC)
12480 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12481 	else
12482 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12483 	if (user_data != NULL)
12484 	    ctxt->userData = user_data;
12485     }
12486     if (filename == NULL) {
12487 	ctxt->directory = NULL;
12488     } else {
12489         ctxt->directory = xmlParserGetDirectory(filename);
12490     }
12491 
12492     inputStream = xmlNewInputStream(ctxt);
12493     if (inputStream == NULL) {
12494 	xmlFreeParserCtxt(ctxt);
12495 	xmlFreeParserInputBuffer(buf);
12496 	return(NULL);
12497     }
12498 
12499     if (filename == NULL)
12500 	inputStream->filename = NULL;
12501     else {
12502 	inputStream->filename = (char *)
12503 	    xmlCanonicPath((const xmlChar *) filename);
12504 	if (inputStream->filename == NULL) {
12505 	    xmlFreeParserCtxt(ctxt);
12506 	    xmlFreeParserInputBuffer(buf);
12507 	    return(NULL);
12508 	}
12509     }
12510     inputStream->buf = buf;
12511     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12512     inputPush(ctxt, inputStream);
12513 
12514     /*
12515      * If the caller didn't provide an initial 'chunk' for determining
12516      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517      * that it can be automatically determined later
12518      */
12519     if ((size == 0) || (chunk == NULL)) {
12520 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12521     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12522 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12523 	size_t cur = ctxt->input->cur - ctxt->input->base;
12524 
12525 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12526 
12527         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12528 #ifdef DEBUG_PUSH
12529 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12530 #endif
12531     }
12532 
12533     if (enc != XML_CHAR_ENCODING_NONE) {
12534         xmlSwitchEncoding(ctxt, enc);
12535     }
12536 
12537     return(ctxt);
12538 }
12539 #endif /* LIBXML_PUSH_ENABLED */
12540 
12541 /**
12542  * xmlHaltParser:
12543  * @ctxt:  an XML parser context
12544  *
12545  * Blocks further parser processing don't override error
12546  * for internal use
12547  */
12548 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12549 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12550     if (ctxt == NULL)
12551         return;
12552     ctxt->instate = XML_PARSER_EOF;
12553     ctxt->disableSAX = 1;
12554     while (ctxt->inputNr > 1)
12555         xmlFreeInputStream(inputPop(ctxt));
12556     if (ctxt->input != NULL) {
12557         /*
12558 	 * in case there was a specific allocation deallocate before
12559 	 * overriding base
12560 	 */
12561         if (ctxt->input->free != NULL) {
12562 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12563 	    ctxt->input->free = NULL;
12564 	}
12565         if (ctxt->input->buf != NULL) {
12566             xmlFreeParserInputBuffer(ctxt->input->buf);
12567             ctxt->input->buf = NULL;
12568         }
12569 	ctxt->input->cur = BAD_CAST"";
12570         ctxt->input->length = 0;
12571 	ctxt->input->base = ctxt->input->cur;
12572         ctxt->input->end = ctxt->input->cur;
12573     }
12574 }
12575 
12576 /**
12577  * xmlStopParser:
12578  * @ctxt:  an XML parser context
12579  *
12580  * Blocks further parser processing
12581  */
12582 void
xmlStopParser(xmlParserCtxtPtr ctxt)12583 xmlStopParser(xmlParserCtxtPtr ctxt) {
12584     if (ctxt == NULL)
12585         return;
12586     xmlHaltParser(ctxt);
12587     ctxt->errNo = XML_ERR_USER_STOP;
12588 }
12589 
12590 /**
12591  * xmlCreateIOParserCtxt:
12592  * @sax:  a SAX handler
12593  * @user_data:  The user data returned on SAX callbacks
12594  * @ioread:  an I/O read function
12595  * @ioclose:  an I/O close function
12596  * @ioctx:  an I/O handler
12597  * @enc:  the charset encoding if known
12598  *
12599  * Create a parser context for using the XML parser with an existing
12600  * I/O stream
12601  *
12602  * Returns the new parser context or NULL
12603  */
12604 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12605 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12606 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12607 	void *ioctx, xmlCharEncoding enc) {
12608     xmlParserCtxtPtr ctxt;
12609     xmlParserInputPtr inputStream;
12610     xmlParserInputBufferPtr buf;
12611 
12612     if (ioread == NULL) return(NULL);
12613 
12614     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12615     if (buf == NULL) {
12616         if (ioclose != NULL)
12617             ioclose(ioctx);
12618         return (NULL);
12619     }
12620 
12621     ctxt = xmlNewParserCtxt();
12622     if (ctxt == NULL) {
12623 	xmlFreeParserInputBuffer(buf);
12624 	return(NULL);
12625     }
12626     if (sax != NULL) {
12627 #ifdef LIBXML_SAX1_ENABLED
12628 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12629 #endif /* LIBXML_SAX1_ENABLED */
12630 	    xmlFree(ctxt->sax);
12631 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12632 	if (ctxt->sax == NULL) {
12633 	    xmlFreeParserInputBuffer(buf);
12634 	    xmlErrMemory(ctxt, NULL);
12635 	    xmlFreeParserCtxt(ctxt);
12636 	    return(NULL);
12637 	}
12638 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12639 	if (sax->initialized == XML_SAX2_MAGIC)
12640 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12641 	else
12642 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12643 	if (user_data != NULL)
12644 	    ctxt->userData = user_data;
12645     }
12646 
12647     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12648     if (inputStream == NULL) {
12649 	xmlFreeParserCtxt(ctxt);
12650 	return(NULL);
12651     }
12652     inputPush(ctxt, inputStream);
12653 
12654     return(ctxt);
12655 }
12656 
12657 #ifdef LIBXML_VALID_ENABLED
12658 /************************************************************************
12659  *									*
12660  *		Front ends when parsing a DTD				*
12661  *									*
12662  ************************************************************************/
12663 
12664 /**
12665  * xmlIOParseDTD:
12666  * @sax:  the SAX handler block or NULL
12667  * @input:  an Input Buffer
12668  * @enc:  the charset encoding if known
12669  *
12670  * Load and parse a DTD
12671  *
12672  * Returns the resulting xmlDtdPtr or NULL in case of error.
12673  * @input will be freed by the function in any case.
12674  */
12675 
12676 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12677 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12678 	      xmlCharEncoding enc) {
12679     xmlDtdPtr ret = NULL;
12680     xmlParserCtxtPtr ctxt;
12681     xmlParserInputPtr pinput = NULL;
12682     xmlChar start[4];
12683 
12684     if (input == NULL)
12685 	return(NULL);
12686 
12687     ctxt = xmlNewParserCtxt();
12688     if (ctxt == NULL) {
12689         xmlFreeParserInputBuffer(input);
12690 	return(NULL);
12691     }
12692 
12693     /* We are loading a DTD */
12694     ctxt->options |= XML_PARSE_DTDLOAD;
12695 
12696     /*
12697      * Set-up the SAX context
12698      */
12699     if (sax != NULL) {
12700 	if (ctxt->sax != NULL)
12701 	    xmlFree(ctxt->sax);
12702         ctxt->sax = sax;
12703         ctxt->userData = ctxt;
12704     }
12705     xmlDetectSAX2(ctxt);
12706 
12707     /*
12708      * generate a parser input from the I/O handler
12709      */
12710 
12711     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12712     if (pinput == NULL) {
12713         if (sax != NULL) ctxt->sax = NULL;
12714         xmlFreeParserInputBuffer(input);
12715 	xmlFreeParserCtxt(ctxt);
12716 	return(NULL);
12717     }
12718 
12719     /*
12720      * plug some encoding conversion routines here.
12721      */
12722     if (xmlPushInput(ctxt, pinput) < 0) {
12723         if (sax != NULL) ctxt->sax = NULL;
12724 	xmlFreeParserCtxt(ctxt);
12725 	return(NULL);
12726     }
12727     if (enc != XML_CHAR_ENCODING_NONE) {
12728         xmlSwitchEncoding(ctxt, enc);
12729     }
12730 
12731     pinput->filename = NULL;
12732     pinput->line = 1;
12733     pinput->col = 1;
12734     pinput->base = ctxt->input->cur;
12735     pinput->cur = ctxt->input->cur;
12736     pinput->free = NULL;
12737 
12738     /*
12739      * let's parse that entity knowing it's an external subset.
12740      */
12741     ctxt->inSubset = 2;
12742     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12743     if (ctxt->myDoc == NULL) {
12744 	xmlErrMemory(ctxt, "New Doc failed");
12745 	return(NULL);
12746     }
12747     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12748     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12749 	                               BAD_CAST "none", BAD_CAST "none");
12750 
12751     if ((enc == XML_CHAR_ENCODING_NONE) &&
12752         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12753 	/*
12754 	 * Get the 4 first bytes and decode the charset
12755 	 * if enc != XML_CHAR_ENCODING_NONE
12756 	 * plug some encoding conversion routines.
12757 	 */
12758 	start[0] = RAW;
12759 	start[1] = NXT(1);
12760 	start[2] = NXT(2);
12761 	start[3] = NXT(3);
12762 	enc = xmlDetectCharEncoding(start, 4);
12763 	if (enc != XML_CHAR_ENCODING_NONE) {
12764 	    xmlSwitchEncoding(ctxt, enc);
12765 	}
12766     }
12767 
12768     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12769 
12770     if (ctxt->myDoc != NULL) {
12771 	if (ctxt->wellFormed) {
12772 	    ret = ctxt->myDoc->extSubset;
12773 	    ctxt->myDoc->extSubset = NULL;
12774 	    if (ret != NULL) {
12775 		xmlNodePtr tmp;
12776 
12777 		ret->doc = NULL;
12778 		tmp = ret->children;
12779 		while (tmp != NULL) {
12780 		    tmp->doc = NULL;
12781 		    tmp = tmp->next;
12782 		}
12783 	    }
12784 	} else {
12785 	    ret = NULL;
12786 	}
12787         xmlFreeDoc(ctxt->myDoc);
12788         ctxt->myDoc = NULL;
12789     }
12790     if (sax != NULL) ctxt->sax = NULL;
12791     xmlFreeParserCtxt(ctxt);
12792 
12793     return(ret);
12794 }
12795 
12796 /**
12797  * xmlSAXParseDTD:
12798  * @sax:  the SAX handler block
12799  * @ExternalID:  a NAME* containing the External ID of the DTD
12800  * @SystemID:  a NAME* containing the URL to the DTD
12801  *
12802  * Load and parse an external subset.
12803  *
12804  * Returns the resulting xmlDtdPtr or NULL in case of error.
12805  */
12806 
12807 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12808 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12809                           const xmlChar *SystemID) {
12810     xmlDtdPtr ret = NULL;
12811     xmlParserCtxtPtr ctxt;
12812     xmlParserInputPtr input = NULL;
12813     xmlCharEncoding enc;
12814     xmlChar* systemIdCanonic;
12815 
12816     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12817 
12818     ctxt = xmlNewParserCtxt();
12819     if (ctxt == NULL) {
12820 	return(NULL);
12821     }
12822 
12823     /* We are loading a DTD */
12824     ctxt->options |= XML_PARSE_DTDLOAD;
12825 
12826     /*
12827      * Set-up the SAX context
12828      */
12829     if (sax != NULL) {
12830 	if (ctxt->sax != NULL)
12831 	    xmlFree(ctxt->sax);
12832         ctxt->sax = sax;
12833         ctxt->userData = ctxt;
12834     }
12835 
12836     /*
12837      * Canonicalise the system ID
12838      */
12839     systemIdCanonic = xmlCanonicPath(SystemID);
12840     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12841 	xmlFreeParserCtxt(ctxt);
12842 	return(NULL);
12843     }
12844 
12845     /*
12846      * Ask the Entity resolver to load the damn thing
12847      */
12848 
12849     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12850 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12851 	                                 systemIdCanonic);
12852     if (input == NULL) {
12853         if (sax != NULL) ctxt->sax = NULL;
12854 	xmlFreeParserCtxt(ctxt);
12855 	if (systemIdCanonic != NULL)
12856 	    xmlFree(systemIdCanonic);
12857 	return(NULL);
12858     }
12859 
12860     /*
12861      * plug some encoding conversion routines here.
12862      */
12863     if (xmlPushInput(ctxt, input) < 0) {
12864         if (sax != NULL) ctxt->sax = NULL;
12865 	xmlFreeParserCtxt(ctxt);
12866 	if (systemIdCanonic != NULL)
12867 	    xmlFree(systemIdCanonic);
12868 	return(NULL);
12869     }
12870     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12871 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12872 	xmlSwitchEncoding(ctxt, enc);
12873     }
12874 
12875     if (input->filename == NULL)
12876 	input->filename = (char *) systemIdCanonic;
12877     else
12878 	xmlFree(systemIdCanonic);
12879     input->line = 1;
12880     input->col = 1;
12881     input->base = ctxt->input->cur;
12882     input->cur = ctxt->input->cur;
12883     input->free = NULL;
12884 
12885     /*
12886      * let's parse that entity knowing it's an external subset.
12887      */
12888     ctxt->inSubset = 2;
12889     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12890     if (ctxt->myDoc == NULL) {
12891 	xmlErrMemory(ctxt, "New Doc failed");
12892         if (sax != NULL) ctxt->sax = NULL;
12893 	xmlFreeParserCtxt(ctxt);
12894 	return(NULL);
12895     }
12896     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12897     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12898 	                               ExternalID, SystemID);
12899     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12900 
12901     if (ctxt->myDoc != NULL) {
12902 	if (ctxt->wellFormed) {
12903 	    ret = ctxt->myDoc->extSubset;
12904 	    ctxt->myDoc->extSubset = NULL;
12905 	    if (ret != NULL) {
12906 		xmlNodePtr tmp;
12907 
12908 		ret->doc = NULL;
12909 		tmp = ret->children;
12910 		while (tmp != NULL) {
12911 		    tmp->doc = NULL;
12912 		    tmp = tmp->next;
12913 		}
12914 	    }
12915 	} else {
12916 	    ret = NULL;
12917 	}
12918         xmlFreeDoc(ctxt->myDoc);
12919         ctxt->myDoc = NULL;
12920     }
12921     if (sax != NULL) ctxt->sax = NULL;
12922     xmlFreeParserCtxt(ctxt);
12923 
12924     return(ret);
12925 }
12926 
12927 
12928 /**
12929  * xmlParseDTD:
12930  * @ExternalID:  a NAME* containing the External ID of the DTD
12931  * @SystemID:  a NAME* containing the URL to the DTD
12932  *
12933  * Load and parse an external subset.
12934  *
12935  * Returns the resulting xmlDtdPtr or NULL in case of error.
12936  */
12937 
12938 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12939 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12940     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12941 }
12942 #endif /* LIBXML_VALID_ENABLED */
12943 
12944 /************************************************************************
12945  *									*
12946  *		Front ends when parsing an Entity			*
12947  *									*
12948  ************************************************************************/
12949 
12950 /**
12951  * xmlParseCtxtExternalEntity:
12952  * @ctx:  the existing parsing context
12953  * @URL:  the URL for the entity to load
12954  * @ID:  the System ID for the entity to load
12955  * @lst:  the return value for the set of parsed nodes
12956  *
12957  * Parse an external general entity within an existing parsing context
12958  * An external general parsed entity is well-formed if it matches the
12959  * production labeled extParsedEnt.
12960  *
12961  * [78] extParsedEnt ::= TextDecl? content
12962  *
12963  * Returns 0 if the entity is well formed, -1 in case of args problem and
12964  *    the parser error code otherwise
12965  */
12966 
12967 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12969 	               const xmlChar *ID, xmlNodePtr *lst) {
12970     void *userData;
12971 
12972     if (ctx == NULL) return(-1);
12973     /*
12974      * If the user provided their own SAX callbacks, then reuse the
12975      * userData callback field, otherwise the expected setup in a
12976      * DOM builder is to have userData == ctxt
12977      */
12978     if (ctx->userData == ctx)
12979         userData = NULL;
12980     else
12981         userData = ctx->userData;
12982     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12983                                          userData, ctx->depth + 1,
12984                                          URL, ID, lst);
12985 }
12986 
12987 /**
12988  * xmlParseExternalEntityPrivate:
12989  * @doc:  the document the chunk pertains to
12990  * @oldctxt:  the previous parser context if available
12991  * @sax:  the SAX handler block (possibly NULL)
12992  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12993  * @depth:  Used for loop detection, use 0
12994  * @URL:  the URL for the entity to load
12995  * @ID:  the System ID for the entity to load
12996  * @list:  the return value for the set of parsed nodes
12997  *
12998  * Private version of xmlParseExternalEntity()
12999  *
13000  * Returns 0 if the entity is well formed, -1 in case of args problem and
13001  *    the parser error code otherwise
13002  */
13003 
13004 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13005 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13006 	              xmlSAXHandlerPtr sax,
13007 		      void *user_data, int depth, const xmlChar *URL,
13008 		      const xmlChar *ID, xmlNodePtr *list) {
13009     xmlParserCtxtPtr ctxt;
13010     xmlDocPtr newDoc;
13011     xmlNodePtr newRoot;
13012     xmlSAXHandlerPtr oldsax = NULL;
13013     xmlParserErrors ret = XML_ERR_OK;
13014     xmlChar start[4];
13015     xmlCharEncoding enc;
13016 
13017     if (((depth > 40) &&
13018 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13019 	(depth > 1024)) {
13020 	return(XML_ERR_ENTITY_LOOP);
13021     }
13022 
13023     if (list != NULL)
13024         *list = NULL;
13025     if ((URL == NULL) && (ID == NULL))
13026 	return(XML_ERR_INTERNAL_ERROR);
13027     if (doc == NULL)
13028 	return(XML_ERR_INTERNAL_ERROR);
13029 
13030 
13031     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13032     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13033     ctxt->userData = ctxt;
13034     if (sax != NULL) {
13035 	oldsax = ctxt->sax;
13036         ctxt->sax = sax;
13037 	if (user_data != NULL)
13038 	    ctxt->userData = user_data;
13039     }
13040     xmlDetectSAX2(ctxt);
13041     newDoc = xmlNewDoc(BAD_CAST "1.0");
13042     if (newDoc == NULL) {
13043 	xmlFreeParserCtxt(ctxt);
13044 	return(XML_ERR_INTERNAL_ERROR);
13045     }
13046     newDoc->properties = XML_DOC_INTERNAL;
13047     if (doc) {
13048         newDoc->intSubset = doc->intSubset;
13049         newDoc->extSubset = doc->extSubset;
13050         if (doc->dict) {
13051             newDoc->dict = doc->dict;
13052             xmlDictReference(newDoc->dict);
13053         }
13054         if (doc->URL != NULL) {
13055             newDoc->URL = xmlStrdup(doc->URL);
13056         }
13057     }
13058     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13059     if (newRoot == NULL) {
13060 	if (sax != NULL)
13061 	    ctxt->sax = oldsax;
13062 	xmlFreeParserCtxt(ctxt);
13063 	newDoc->intSubset = NULL;
13064 	newDoc->extSubset = NULL;
13065         xmlFreeDoc(newDoc);
13066 	return(XML_ERR_INTERNAL_ERROR);
13067     }
13068     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13069     nodePush(ctxt, newDoc->children);
13070     if (doc == NULL) {
13071         ctxt->myDoc = newDoc;
13072     } else {
13073         ctxt->myDoc = doc;
13074         newRoot->doc = doc;
13075     }
13076 
13077     /*
13078      * Get the 4 first bytes and decode the charset
13079      * if enc != XML_CHAR_ENCODING_NONE
13080      * plug some encoding conversion routines.
13081      */
13082     GROW;
13083     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13084 	start[0] = RAW;
13085 	start[1] = NXT(1);
13086 	start[2] = NXT(2);
13087 	start[3] = NXT(3);
13088 	enc = xmlDetectCharEncoding(start, 4);
13089 	if (enc != XML_CHAR_ENCODING_NONE) {
13090 	    xmlSwitchEncoding(ctxt, enc);
13091 	}
13092     }
13093 
13094     /*
13095      * Parse a possible text declaration first
13096      */
13097     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13098 	xmlParseTextDecl(ctxt);
13099         /*
13100          * An XML-1.0 document can't reference an entity not XML-1.0
13101          */
13102         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13103             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13104             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13105                            "Version mismatch between document and entity\n");
13106         }
13107     }
13108 
13109     ctxt->instate = XML_PARSER_CONTENT;
13110     ctxt->depth = depth;
13111     if (oldctxt != NULL) {
13112 	ctxt->_private = oldctxt->_private;
13113 	ctxt->loadsubset = oldctxt->loadsubset;
13114 	ctxt->validate = oldctxt->validate;
13115 	ctxt->valid = oldctxt->valid;
13116 	ctxt->replaceEntities = oldctxt->replaceEntities;
13117         if (oldctxt->validate) {
13118             ctxt->vctxt.error = oldctxt->vctxt.error;
13119             ctxt->vctxt.warning = oldctxt->vctxt.warning;
13120             ctxt->vctxt.userData = oldctxt->vctxt.userData;
13121         }
13122 	ctxt->external = oldctxt->external;
13123         if (ctxt->dict) xmlDictFree(ctxt->dict);
13124         ctxt->dict = oldctxt->dict;
13125         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13126         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13127         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13128         ctxt->dictNames = oldctxt->dictNames;
13129         ctxt->attsDefault = oldctxt->attsDefault;
13130         ctxt->attsSpecial = oldctxt->attsSpecial;
13131         ctxt->linenumbers = oldctxt->linenumbers;
13132 	ctxt->record_info = oldctxt->record_info;
13133 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13134 	ctxt->node_seq.length = oldctxt->node_seq.length;
13135 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13136     } else {
13137 	/*
13138 	 * Doing validity checking on chunk without context
13139 	 * doesn't make sense
13140 	 */
13141 	ctxt->_private = NULL;
13142 	ctxt->validate = 0;
13143 	ctxt->external = 2;
13144 	ctxt->loadsubset = 0;
13145     }
13146 
13147     xmlParseContent(ctxt);
13148 
13149     if ((RAW == '<') && (NXT(1) == '/')) {
13150 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13151     } else if (RAW != 0) {
13152 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13153     }
13154     if (ctxt->node != newDoc->children) {
13155 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156     }
13157 
13158     if (!ctxt->wellFormed) {
13159         if (ctxt->errNo == 0)
13160 	    ret = XML_ERR_INTERNAL_ERROR;
13161 	else
13162 	    ret = (xmlParserErrors)ctxt->errNo;
13163     } else {
13164 	if (list != NULL) {
13165 	    xmlNodePtr cur;
13166 
13167 	    /*
13168 	     * Return the newly created nodeset after unlinking it from
13169 	     * they pseudo parent.
13170 	     */
13171 	    cur = newDoc->children->children;
13172 	    *list = cur;
13173 	    while (cur != NULL) {
13174 		cur->parent = NULL;
13175 		cur = cur->next;
13176 	    }
13177             newDoc->children->children = NULL;
13178 	}
13179 	ret = XML_ERR_OK;
13180     }
13181 
13182     /*
13183      * Record in the parent context the number of entities replacement
13184      * done when parsing that reference.
13185      */
13186     if (oldctxt != NULL)
13187         oldctxt->nbentities += ctxt->nbentities;
13188 
13189     /*
13190      * Also record the size of the entity parsed
13191      */
13192     if (ctxt->input != NULL && oldctxt != NULL) {
13193 	oldctxt->sizeentities += ctxt->input->consumed;
13194 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13195     }
13196     /*
13197      * And record the last error if any
13198      */
13199     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13200         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13201 
13202     if (sax != NULL)
13203 	ctxt->sax = oldsax;
13204     if (oldctxt != NULL) {
13205         ctxt->dict = NULL;
13206         ctxt->attsDefault = NULL;
13207         ctxt->attsSpecial = NULL;
13208         oldctxt->validate = ctxt->validate;
13209         oldctxt->valid = ctxt->valid;
13210         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13211         oldctxt->node_seq.length = ctxt->node_seq.length;
13212         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13213     }
13214     ctxt->node_seq.maximum = 0;
13215     ctxt->node_seq.length = 0;
13216     ctxt->node_seq.buffer = NULL;
13217     xmlFreeParserCtxt(ctxt);
13218     newDoc->intSubset = NULL;
13219     newDoc->extSubset = NULL;
13220     xmlFreeDoc(newDoc);
13221 
13222     return(ret);
13223 }
13224 
13225 #ifdef LIBXML_SAX1_ENABLED
13226 /**
13227  * xmlParseExternalEntity:
13228  * @doc:  the document the chunk pertains to
13229  * @sax:  the SAX handler block (possibly NULL)
13230  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13231  * @depth:  Used for loop detection, use 0
13232  * @URL:  the URL for the entity to load
13233  * @ID:  the System ID for the entity to load
13234  * @lst:  the return value for the set of parsed nodes
13235  *
13236  * Parse an external general entity
13237  * An external general parsed entity is well-formed if it matches the
13238  * production labeled extParsedEnt.
13239  *
13240  * [78] extParsedEnt ::= TextDecl? content
13241  *
13242  * Returns 0 if the entity is well formed, -1 in case of args problem and
13243  *    the parser error code otherwise
13244  */
13245 
13246 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13247 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13248 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13249     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13250 		                       ID, lst));
13251 }
13252 
13253 /**
13254  * xmlParseBalancedChunkMemory:
13255  * @doc:  the document the chunk pertains to (must not be NULL)
13256  * @sax:  the SAX handler block (possibly NULL)
13257  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13258  * @depth:  Used for loop detection, use 0
13259  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13260  * @lst:  the return value for the set of parsed nodes
13261  *
13262  * Parse a well-balanced chunk of an XML document
13263  * called by the parser
13264  * The allowed sequence for the Well Balanced Chunk is the one defined by
13265  * the content production in the XML grammar:
13266  *
13267  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13268  *
13269  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270  *    the parser error code otherwise
13271  */
13272 
13273 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13274 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13275      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13276     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13277                                                 depth, string, lst, 0 );
13278 }
13279 #endif /* LIBXML_SAX1_ENABLED */
13280 
13281 /**
13282  * xmlParseBalancedChunkMemoryInternal:
13283  * @oldctxt:  the existing parsing context
13284  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13285  * @user_data:  the user data field for the parser context
13286  * @lst:  the return value for the set of parsed nodes
13287  *
13288  *
13289  * Parse a well-balanced chunk of an XML document
13290  * called by the parser
13291  * The allowed sequence for the Well Balanced Chunk is the one defined by
13292  * the content production in the XML grammar:
13293  *
13294  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13295  *
13296  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297  * error code otherwise
13298  *
13299  * In case recover is set to 1, the nodelist will not be empty even if
13300  * the parsed chunk is not well balanced.
13301  */
13302 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13303 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13304 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13305     xmlParserCtxtPtr ctxt;
13306     xmlDocPtr newDoc = NULL;
13307     xmlNodePtr newRoot;
13308     xmlSAXHandlerPtr oldsax = NULL;
13309     xmlNodePtr content = NULL;
13310     xmlNodePtr last = NULL;
13311     int size;
13312     xmlParserErrors ret = XML_ERR_OK;
13313 #ifdef SAX2
13314     int i;
13315 #endif
13316 
13317     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13318         (oldctxt->depth >  1024)) {
13319 	return(XML_ERR_ENTITY_LOOP);
13320     }
13321 
13322 
13323     if (lst != NULL)
13324         *lst = NULL;
13325     if (string == NULL)
13326         return(XML_ERR_INTERNAL_ERROR);
13327 
13328     size = xmlStrlen(string);
13329 
13330     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13331     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13332     if (user_data != NULL)
13333 	ctxt->userData = user_data;
13334     else
13335 	ctxt->userData = ctxt;
13336     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13337     ctxt->dict = oldctxt->dict;
13338     ctxt->input_id = oldctxt->input_id + 1;
13339     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13340     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13341     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13342 
13343 #ifdef SAX2
13344     /* propagate namespaces down the entity */
13345     for (i = 0;i < oldctxt->nsNr;i += 2) {
13346         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13347     }
13348 #endif
13349 
13350     oldsax = ctxt->sax;
13351     ctxt->sax = oldctxt->sax;
13352     xmlDetectSAX2(ctxt);
13353     ctxt->replaceEntities = oldctxt->replaceEntities;
13354     ctxt->options = oldctxt->options;
13355 
13356     ctxt->_private = oldctxt->_private;
13357     if (oldctxt->myDoc == NULL) {
13358 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13359 	if (newDoc == NULL) {
13360 	    ctxt->sax = oldsax;
13361 	    ctxt->dict = NULL;
13362 	    xmlFreeParserCtxt(ctxt);
13363 	    return(XML_ERR_INTERNAL_ERROR);
13364 	}
13365 	newDoc->properties = XML_DOC_INTERNAL;
13366 	newDoc->dict = ctxt->dict;
13367 	xmlDictReference(newDoc->dict);
13368 	ctxt->myDoc = newDoc;
13369     } else {
13370 	ctxt->myDoc = oldctxt->myDoc;
13371         content = ctxt->myDoc->children;
13372 	last = ctxt->myDoc->last;
13373     }
13374     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13375     if (newRoot == NULL) {
13376 	ctxt->sax = oldsax;
13377 	ctxt->dict = NULL;
13378 	xmlFreeParserCtxt(ctxt);
13379 	if (newDoc != NULL) {
13380 	    xmlFreeDoc(newDoc);
13381 	}
13382 	return(XML_ERR_INTERNAL_ERROR);
13383     }
13384     ctxt->myDoc->children = NULL;
13385     ctxt->myDoc->last = NULL;
13386     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13387     nodePush(ctxt, ctxt->myDoc->children);
13388     ctxt->instate = XML_PARSER_CONTENT;
13389     ctxt->depth = oldctxt->depth + 1;
13390 
13391     ctxt->validate = 0;
13392     ctxt->loadsubset = oldctxt->loadsubset;
13393     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13394 	/*
13395 	 * ID/IDREF registration will be done in xmlValidateElement below
13396 	 */
13397 	ctxt->loadsubset |= XML_SKIP_IDS;
13398     }
13399     ctxt->dictNames = oldctxt->dictNames;
13400     ctxt->attsDefault = oldctxt->attsDefault;
13401     ctxt->attsSpecial = oldctxt->attsSpecial;
13402 
13403     xmlParseContent(ctxt);
13404     if ((RAW == '<') && (NXT(1) == '/')) {
13405 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13406     } else if (RAW != 0) {
13407 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13408     }
13409     if (ctxt->node != ctxt->myDoc->children) {
13410 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13411     }
13412 
13413     if (!ctxt->wellFormed) {
13414         if (ctxt->errNo == 0)
13415 	    ret = XML_ERR_INTERNAL_ERROR;
13416 	else
13417 	    ret = (xmlParserErrors)ctxt->errNo;
13418     } else {
13419       ret = XML_ERR_OK;
13420     }
13421 
13422     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13423 	xmlNodePtr cur;
13424 
13425 	/*
13426 	 * Return the newly created nodeset after unlinking it from
13427 	 * they pseudo parent.
13428 	 */
13429 	cur = ctxt->myDoc->children->children;
13430 	*lst = cur;
13431 	while (cur != NULL) {
13432 #ifdef LIBXML_VALID_ENABLED
13433 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13434 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13435 		(cur->type == XML_ELEMENT_NODE)) {
13436 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13437 			oldctxt->myDoc, cur);
13438 	    }
13439 #endif /* LIBXML_VALID_ENABLED */
13440 	    cur->parent = NULL;
13441 	    cur = cur->next;
13442 	}
13443 	ctxt->myDoc->children->children = NULL;
13444     }
13445     if (ctxt->myDoc != NULL) {
13446 	xmlFreeNode(ctxt->myDoc->children);
13447         ctxt->myDoc->children = content;
13448         ctxt->myDoc->last = last;
13449     }
13450 
13451     /*
13452      * Record in the parent context the number of entities replacement
13453      * done when parsing that reference.
13454      */
13455     if (oldctxt != NULL)
13456         oldctxt->nbentities += ctxt->nbentities;
13457 
13458     /*
13459      * Also record the last error if any
13460      */
13461     if (ctxt->lastError.code != XML_ERR_OK)
13462         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13463 
13464     ctxt->sax = oldsax;
13465     ctxt->dict = NULL;
13466     ctxt->attsDefault = NULL;
13467     ctxt->attsSpecial = NULL;
13468     xmlFreeParserCtxt(ctxt);
13469     if (newDoc != NULL) {
13470 	xmlFreeDoc(newDoc);
13471     }
13472 
13473     return(ret);
13474 }
13475 
13476 /**
13477  * xmlParseInNodeContext:
13478  * @node:  the context node
13479  * @data:  the input string
13480  * @datalen:  the input string length in bytes
13481  * @options:  a combination of xmlParserOption
13482  * @lst:  the return value for the set of parsed nodes
13483  *
13484  * Parse a well-balanced chunk of an XML document
13485  * within the context (DTD, namespaces, etc ...) of the given node.
13486  *
13487  * The allowed sequence for the data is a Well Balanced Chunk defined by
13488  * the content production in the XML grammar:
13489  *
13490  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13491  *
13492  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493  * error code otherwise
13494  */
13495 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13496 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13497                       int options, xmlNodePtr *lst) {
13498 #ifdef SAX2
13499     xmlParserCtxtPtr ctxt;
13500     xmlDocPtr doc = NULL;
13501     xmlNodePtr fake, cur;
13502     int nsnr = 0;
13503 
13504     xmlParserErrors ret = XML_ERR_OK;
13505 
13506     /*
13507      * check all input parameters, grab the document
13508      */
13509     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13510         return(XML_ERR_INTERNAL_ERROR);
13511     switch (node->type) {
13512         case XML_ELEMENT_NODE:
13513         case XML_ATTRIBUTE_NODE:
13514         case XML_TEXT_NODE:
13515         case XML_CDATA_SECTION_NODE:
13516         case XML_ENTITY_REF_NODE:
13517         case XML_PI_NODE:
13518         case XML_COMMENT_NODE:
13519         case XML_DOCUMENT_NODE:
13520         case XML_HTML_DOCUMENT_NODE:
13521 	    break;
13522 	default:
13523 	    return(XML_ERR_INTERNAL_ERROR);
13524 
13525     }
13526     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13527            (node->type != XML_DOCUMENT_NODE) &&
13528 	   (node->type != XML_HTML_DOCUMENT_NODE))
13529 	node = node->parent;
13530     if (node == NULL)
13531 	return(XML_ERR_INTERNAL_ERROR);
13532     if (node->type == XML_ELEMENT_NODE)
13533 	doc = node->doc;
13534     else
13535         doc = (xmlDocPtr) node;
13536     if (doc == NULL)
13537 	return(XML_ERR_INTERNAL_ERROR);
13538 
13539     /*
13540      * allocate a context and set-up everything not related to the
13541      * node position in the tree
13542      */
13543     if (doc->type == XML_DOCUMENT_NODE)
13544 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13545 #ifdef LIBXML_HTML_ENABLED
13546     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13547 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13548         /*
13549          * When parsing in context, it makes no sense to add implied
13550          * elements like html/body/etc...
13551          */
13552         options |= HTML_PARSE_NOIMPLIED;
13553     }
13554 #endif
13555     else
13556         return(XML_ERR_INTERNAL_ERROR);
13557 
13558     if (ctxt == NULL)
13559         return(XML_ERR_NO_MEMORY);
13560 
13561     /*
13562      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564      * we must wait until the last moment to free the original one.
13565      */
13566     if (doc->dict != NULL) {
13567         if (ctxt->dict != NULL)
13568 	    xmlDictFree(ctxt->dict);
13569 	ctxt->dict = doc->dict;
13570     } else
13571         options |= XML_PARSE_NODICT;
13572 
13573     if (doc->encoding != NULL) {
13574         xmlCharEncodingHandlerPtr hdlr;
13575 
13576         if (ctxt->encoding != NULL)
13577 	    xmlFree((xmlChar *) ctxt->encoding);
13578         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13579 
13580         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13581         if (hdlr != NULL) {
13582             xmlSwitchToEncoding(ctxt, hdlr);
13583 	} else {
13584             return(XML_ERR_UNSUPPORTED_ENCODING);
13585         }
13586     }
13587 
13588     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13589     xmlDetectSAX2(ctxt);
13590     ctxt->myDoc = doc;
13591     /* parsing in context, i.e. as within existing content */
13592     ctxt->input_id = 2;
13593     ctxt->instate = XML_PARSER_CONTENT;
13594 
13595     fake = xmlNewDocComment(node->doc, NULL);
13596     if (fake == NULL) {
13597         xmlFreeParserCtxt(ctxt);
13598 	return(XML_ERR_NO_MEMORY);
13599     }
13600     xmlAddChild(node, fake);
13601 
13602     if (node->type == XML_ELEMENT_NODE) {
13603 	nodePush(ctxt, node);
13604 	/*
13605 	 * initialize the SAX2 namespaces stack
13606 	 */
13607 	cur = node;
13608 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13609 	    xmlNsPtr ns = cur->nsDef;
13610 	    const xmlChar *iprefix, *ihref;
13611 
13612 	    while (ns != NULL) {
13613 		if (ctxt->dict) {
13614 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13615 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13616 		} else {
13617 		    iprefix = ns->prefix;
13618 		    ihref = ns->href;
13619 		}
13620 
13621 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13622 		    nsPush(ctxt, iprefix, ihref);
13623 		    nsnr++;
13624 		}
13625 		ns = ns->next;
13626 	    }
13627 	    cur = cur->parent;
13628 	}
13629     }
13630 
13631     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13632 	/*
13633 	 * ID/IDREF registration will be done in xmlValidateElement below
13634 	 */
13635 	ctxt->loadsubset |= XML_SKIP_IDS;
13636     }
13637 
13638 #ifdef LIBXML_HTML_ENABLED
13639     if (doc->type == XML_HTML_DOCUMENT_NODE)
13640         __htmlParseContent(ctxt);
13641     else
13642 #endif
13643 	xmlParseContent(ctxt);
13644 
13645     nsPop(ctxt, nsnr);
13646     if ((RAW == '<') && (NXT(1) == '/')) {
13647 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13648     } else if (RAW != 0) {
13649 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13650     }
13651     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13652 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653 	ctxt->wellFormed = 0;
13654     }
13655 
13656     if (!ctxt->wellFormed) {
13657         if (ctxt->errNo == 0)
13658 	    ret = XML_ERR_INTERNAL_ERROR;
13659 	else
13660 	    ret = (xmlParserErrors)ctxt->errNo;
13661     } else {
13662         ret = XML_ERR_OK;
13663     }
13664 
13665     /*
13666      * Return the newly created nodeset after unlinking it from
13667      * the pseudo sibling.
13668      */
13669 
13670     cur = fake->next;
13671     fake->next = NULL;
13672     node->last = fake;
13673 
13674     if (cur != NULL) {
13675 	cur->prev = NULL;
13676     }
13677 
13678     *lst = cur;
13679 
13680     while (cur != NULL) {
13681 	cur->parent = NULL;
13682 	cur = cur->next;
13683     }
13684 
13685     xmlUnlinkNode(fake);
13686     xmlFreeNode(fake);
13687 
13688 
13689     if (ret != XML_ERR_OK) {
13690         xmlFreeNodeList(*lst);
13691 	*lst = NULL;
13692     }
13693 
13694     if (doc->dict != NULL)
13695         ctxt->dict = NULL;
13696     xmlFreeParserCtxt(ctxt);
13697 
13698     return(ret);
13699 #else /* !SAX2 */
13700     return(XML_ERR_INTERNAL_ERROR);
13701 #endif
13702 }
13703 
13704 #ifdef LIBXML_SAX1_ENABLED
13705 /**
13706  * xmlParseBalancedChunkMemoryRecover:
13707  * @doc:  the document the chunk pertains to (must not be NULL)
13708  * @sax:  the SAX handler block (possibly NULL)
13709  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13710  * @depth:  Used for loop detection, use 0
13711  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13712  * @lst:  the return value for the set of parsed nodes
13713  * @recover: return nodes even if the data is broken (use 0)
13714  *
13715  *
13716  * Parse a well-balanced chunk of an XML document
13717  * called by the parser
13718  * The allowed sequence for the Well Balanced Chunk is the one defined by
13719  * the content production in the XML grammar:
13720  *
13721  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13722  *
13723  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724  *    the parser error code otherwise
13725  *
13726  * In case recover is set to 1, the nodelist will not be empty even if
13727  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13728  * some extent.
13729  */
13730 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13731 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13732      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13733      int recover) {
13734     xmlParserCtxtPtr ctxt;
13735     xmlDocPtr newDoc;
13736     xmlSAXHandlerPtr oldsax = NULL;
13737     xmlNodePtr content, newRoot;
13738     int size;
13739     int ret = 0;
13740 
13741     if (depth > 40) {
13742 	return(XML_ERR_ENTITY_LOOP);
13743     }
13744 
13745 
13746     if (lst != NULL)
13747         *lst = NULL;
13748     if (string == NULL)
13749         return(-1);
13750 
13751     size = xmlStrlen(string);
13752 
13753     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13754     if (ctxt == NULL) return(-1);
13755     ctxt->userData = ctxt;
13756     if (sax != NULL) {
13757 	oldsax = ctxt->sax;
13758         ctxt->sax = sax;
13759 	if (user_data != NULL)
13760 	    ctxt->userData = user_data;
13761     }
13762     newDoc = xmlNewDoc(BAD_CAST "1.0");
13763     if (newDoc == NULL) {
13764 	xmlFreeParserCtxt(ctxt);
13765 	return(-1);
13766     }
13767     newDoc->properties = XML_DOC_INTERNAL;
13768     if ((doc != NULL) && (doc->dict != NULL)) {
13769         xmlDictFree(ctxt->dict);
13770 	ctxt->dict = doc->dict;
13771 	xmlDictReference(ctxt->dict);
13772 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13773 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13774 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13775 	ctxt->dictNames = 1;
13776     } else {
13777 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13778     }
13779     /* doc == NULL is only supported for historic reasons */
13780     if (doc != NULL) {
13781 	newDoc->intSubset = doc->intSubset;
13782 	newDoc->extSubset = doc->extSubset;
13783     }
13784     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13785     if (newRoot == NULL) {
13786 	if (sax != NULL)
13787 	    ctxt->sax = oldsax;
13788 	xmlFreeParserCtxt(ctxt);
13789 	newDoc->intSubset = NULL;
13790 	newDoc->extSubset = NULL;
13791         xmlFreeDoc(newDoc);
13792 	return(-1);
13793     }
13794     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13795     nodePush(ctxt, newRoot);
13796     /* doc == NULL is only supported for historic reasons */
13797     if (doc == NULL) {
13798 	ctxt->myDoc = newDoc;
13799     } else {
13800 	ctxt->myDoc = newDoc;
13801 	newDoc->children->doc = doc;
13802 	/* Ensure that doc has XML spec namespace */
13803 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13804 	newDoc->oldNs = doc->oldNs;
13805     }
13806     ctxt->instate = XML_PARSER_CONTENT;
13807     ctxt->input_id = 2;
13808     ctxt->depth = depth;
13809 
13810     /*
13811      * Doing validity checking on chunk doesn't make sense
13812      */
13813     ctxt->validate = 0;
13814     ctxt->loadsubset = 0;
13815     xmlDetectSAX2(ctxt);
13816 
13817     if ( doc != NULL ){
13818         content = doc->children;
13819         doc->children = NULL;
13820         xmlParseContent(ctxt);
13821         doc->children = content;
13822     }
13823     else {
13824         xmlParseContent(ctxt);
13825     }
13826     if ((RAW == '<') && (NXT(1) == '/')) {
13827 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13828     } else if (RAW != 0) {
13829 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13830     }
13831     if (ctxt->node != newDoc->children) {
13832 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13833     }
13834 
13835     if (!ctxt->wellFormed) {
13836         if (ctxt->errNo == 0)
13837 	    ret = 1;
13838 	else
13839 	    ret = ctxt->errNo;
13840     } else {
13841       ret = 0;
13842     }
13843 
13844     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13845 	xmlNodePtr cur;
13846 
13847 	/*
13848 	 * Return the newly created nodeset after unlinking it from
13849 	 * they pseudo parent.
13850 	 */
13851 	cur = newDoc->children->children;
13852 	*lst = cur;
13853 	while (cur != NULL) {
13854 	    xmlSetTreeDoc(cur, doc);
13855 	    cur->parent = NULL;
13856 	    cur = cur->next;
13857 	}
13858 	newDoc->children->children = NULL;
13859     }
13860 
13861     if (sax != NULL)
13862 	ctxt->sax = oldsax;
13863     xmlFreeParserCtxt(ctxt);
13864     newDoc->intSubset = NULL;
13865     newDoc->extSubset = NULL;
13866     /* This leaks the namespace list if doc == NULL */
13867     newDoc->oldNs = NULL;
13868     xmlFreeDoc(newDoc);
13869 
13870     return(ret);
13871 }
13872 
13873 /**
13874  * xmlSAXParseEntity:
13875  * @sax:  the SAX handler block
13876  * @filename:  the filename
13877  *
13878  * parse an XML external entity out of context and build a tree.
13879  * It use the given SAX function block to handle the parsing callback.
13880  * If sax is NULL, fallback to the default DOM tree building routines.
13881  *
13882  * [78] extParsedEnt ::= TextDecl? content
13883  *
13884  * This correspond to a "Well Balanced" chunk
13885  *
13886  * Returns the resulting document tree
13887  */
13888 
13889 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13890 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13891     xmlDocPtr ret;
13892     xmlParserCtxtPtr ctxt;
13893 
13894     ctxt = xmlCreateFileParserCtxt(filename);
13895     if (ctxt == NULL) {
13896 	return(NULL);
13897     }
13898     if (sax != NULL) {
13899 	if (ctxt->sax != NULL)
13900 	    xmlFree(ctxt->sax);
13901         ctxt->sax = sax;
13902         ctxt->userData = NULL;
13903     }
13904 
13905     xmlParseExtParsedEnt(ctxt);
13906 
13907     if (ctxt->wellFormed)
13908 	ret = ctxt->myDoc;
13909     else {
13910         ret = NULL;
13911         xmlFreeDoc(ctxt->myDoc);
13912         ctxt->myDoc = NULL;
13913     }
13914     if (sax != NULL)
13915         ctxt->sax = NULL;
13916     xmlFreeParserCtxt(ctxt);
13917 
13918     return(ret);
13919 }
13920 
13921 /**
13922  * xmlParseEntity:
13923  * @filename:  the filename
13924  *
13925  * parse an XML external entity out of context and build a tree.
13926  *
13927  * [78] extParsedEnt ::= TextDecl? content
13928  *
13929  * This correspond to a "Well Balanced" chunk
13930  *
13931  * Returns the resulting document tree
13932  */
13933 
13934 xmlDocPtr
xmlParseEntity(const char * filename)13935 xmlParseEntity(const char *filename) {
13936     return(xmlSAXParseEntity(NULL, filename));
13937 }
13938 #endif /* LIBXML_SAX1_ENABLED */
13939 
13940 /**
13941  * xmlCreateEntityParserCtxtInternal:
13942  * @URL:  the entity URL
13943  * @ID:  the entity PUBLIC ID
13944  * @base:  a possible base for the target URI
13945  * @pctx:  parser context used to set options on new context
13946  *
13947  * Create a parser context for an external entity
13948  * Automatic support for ZLIB/Compress compressed document is provided
13949  * by default if found at compile-time.
13950  *
13951  * Returns the new parser context or NULL
13952  */
13953 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13954 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13955 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13956     xmlParserCtxtPtr ctxt;
13957     xmlParserInputPtr inputStream;
13958     char *directory = NULL;
13959     xmlChar *uri;
13960 
13961     ctxt = xmlNewParserCtxt();
13962     if (ctxt == NULL) {
13963 	return(NULL);
13964     }
13965 
13966     if (pctx != NULL) {
13967         ctxt->options = pctx->options;
13968         ctxt->_private = pctx->_private;
13969 	/*
13970 	 * this is a subparser of pctx, so the input_id should be
13971 	 * incremented to distinguish from main entity
13972 	 */
13973 	ctxt->input_id = pctx->input_id + 1;
13974     }
13975 
13976     /* Don't read from stdin. */
13977     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13978         URL = BAD_CAST "./-";
13979 
13980     uri = xmlBuildURI(URL, base);
13981 
13982     if (uri == NULL) {
13983 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13984 	if (inputStream == NULL) {
13985 	    xmlFreeParserCtxt(ctxt);
13986 	    return(NULL);
13987 	}
13988 
13989 	inputPush(ctxt, inputStream);
13990 
13991 	if ((ctxt->directory == NULL) && (directory == NULL))
13992 	    directory = xmlParserGetDirectory((char *)URL);
13993 	if ((ctxt->directory == NULL) && (directory != NULL))
13994 	    ctxt->directory = directory;
13995     } else {
13996 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13997 	if (inputStream == NULL) {
13998 	    xmlFree(uri);
13999 	    xmlFreeParserCtxt(ctxt);
14000 	    return(NULL);
14001 	}
14002 
14003 	inputPush(ctxt, inputStream);
14004 
14005 	if ((ctxt->directory == NULL) && (directory == NULL))
14006 	    directory = xmlParserGetDirectory((char *)uri);
14007 	if ((ctxt->directory == NULL) && (directory != NULL))
14008 	    ctxt->directory = directory;
14009 	xmlFree(uri);
14010     }
14011     return(ctxt);
14012 }
14013 
14014 /**
14015  * xmlCreateEntityParserCtxt:
14016  * @URL:  the entity URL
14017  * @ID:  the entity PUBLIC ID
14018  * @base:  a possible base for the target URI
14019  *
14020  * Create a parser context for an external entity
14021  * Automatic support for ZLIB/Compress compressed document is provided
14022  * by default if found at compile-time.
14023  *
14024  * Returns the new parser context or NULL
14025  */
14026 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14027 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14028 	                  const xmlChar *base) {
14029     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14030 
14031 }
14032 
14033 /************************************************************************
14034  *									*
14035  *		Front ends when parsing from a file			*
14036  *									*
14037  ************************************************************************/
14038 
14039 /**
14040  * xmlCreateURLParserCtxt:
14041  * @filename:  the filename or URL
14042  * @options:  a combination of xmlParserOption
14043  *
14044  * Create a parser context for a file or URL content.
14045  * Automatic support for ZLIB/Compress compressed document is provided
14046  * by default if found at compile-time and for file accesses
14047  *
14048  * Returns the new parser context or NULL
14049  */
14050 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14051 xmlCreateURLParserCtxt(const char *filename, int options)
14052 {
14053     xmlParserCtxtPtr ctxt;
14054     xmlParserInputPtr inputStream;
14055     char *directory = NULL;
14056 
14057     ctxt = xmlNewParserCtxt();
14058     if (ctxt == NULL) {
14059 	xmlErrMemory(NULL, "cannot allocate parser context");
14060 	return(NULL);
14061     }
14062 
14063     if (options)
14064 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14065     ctxt->linenumbers = 1;
14066 
14067     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14068     if (inputStream == NULL) {
14069 	xmlFreeParserCtxt(ctxt);
14070 	return(NULL);
14071     }
14072 
14073     inputPush(ctxt, inputStream);
14074     if ((ctxt->directory == NULL) && (directory == NULL))
14075         directory = xmlParserGetDirectory(filename);
14076     if ((ctxt->directory == NULL) && (directory != NULL))
14077         ctxt->directory = directory;
14078 
14079     return(ctxt);
14080 }
14081 
14082 /**
14083  * xmlCreateFileParserCtxt:
14084  * @filename:  the filename
14085  *
14086  * Create a parser context for a file content.
14087  * Automatic support for ZLIB/Compress compressed document is provided
14088  * by default if found at compile-time.
14089  *
14090  * Returns the new parser context or NULL
14091  */
14092 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14093 xmlCreateFileParserCtxt(const char *filename)
14094 {
14095     return(xmlCreateURLParserCtxt(filename, 0));
14096 }
14097 
14098 #ifdef LIBXML_SAX1_ENABLED
14099 /**
14100  * xmlSAXParseFileWithData:
14101  * @sax:  the SAX handler block
14102  * @filename:  the filename
14103  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14104  *             documents
14105  * @data:  the userdata
14106  *
14107  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108  * compressed document is provided by default if found at compile-time.
14109  * It use the given SAX function block to handle the parsing callback.
14110  * If sax is NULL, fallback to the default DOM tree building routines.
14111  *
14112  * User data (void *) is stored within the parser context in the
14113  * context's _private member, so it is available nearly everywhere in libxml
14114  *
14115  * Returns the resulting document tree
14116  */
14117 
14118 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14119 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14120                         int recovery, void *data) {
14121     xmlDocPtr ret;
14122     xmlParserCtxtPtr ctxt;
14123 
14124     xmlInitParser();
14125 
14126     ctxt = xmlCreateFileParserCtxt(filename);
14127     if (ctxt == NULL) {
14128 	return(NULL);
14129     }
14130     if (sax != NULL) {
14131 	if (ctxt->sax != NULL)
14132 	    xmlFree(ctxt->sax);
14133         ctxt->sax = sax;
14134     }
14135     xmlDetectSAX2(ctxt);
14136     if (data!=NULL) {
14137 	ctxt->_private = data;
14138     }
14139 
14140     if (ctxt->directory == NULL)
14141         ctxt->directory = xmlParserGetDirectory(filename);
14142 
14143     ctxt->recovery = recovery;
14144 
14145     xmlParseDocument(ctxt);
14146 
14147     if ((ctxt->wellFormed) || recovery) {
14148         ret = ctxt->myDoc;
14149 	if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14150 	    if (ctxt->input->buf->compressed > 0)
14151 		ret->compression = 9;
14152 	    else
14153 		ret->compression = ctxt->input->buf->compressed;
14154 	}
14155     }
14156     else {
14157        ret = NULL;
14158        xmlFreeDoc(ctxt->myDoc);
14159        ctxt->myDoc = NULL;
14160     }
14161     if (sax != NULL)
14162         ctxt->sax = NULL;
14163     xmlFreeParserCtxt(ctxt);
14164 
14165     return(ret);
14166 }
14167 
14168 /**
14169  * xmlSAXParseFile:
14170  * @sax:  the SAX handler block
14171  * @filename:  the filename
14172  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14173  *             documents
14174  *
14175  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176  * compressed document is provided by default if found at compile-time.
14177  * It use the given SAX function block to handle the parsing callback.
14178  * If sax is NULL, fallback to the default DOM tree building routines.
14179  *
14180  * Returns the resulting document tree
14181  */
14182 
14183 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14184 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14185                           int recovery) {
14186     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14187 }
14188 
14189 /**
14190  * xmlRecoverDoc:
14191  * @cur:  a pointer to an array of xmlChar
14192  *
14193  * parse an XML in-memory document and build a tree.
14194  * In the case the document is not Well Formed, a attempt to build a
14195  * tree is tried anyway
14196  *
14197  * Returns the resulting document tree or NULL in case of failure
14198  */
14199 
14200 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14201 xmlRecoverDoc(const xmlChar *cur) {
14202     return(xmlSAXParseDoc(NULL, cur, 1));
14203 }
14204 
14205 /**
14206  * xmlParseFile:
14207  * @filename:  the filename
14208  *
14209  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210  * compressed document is provided by default if found at compile-time.
14211  *
14212  * Returns the resulting document tree if the file was wellformed,
14213  * NULL otherwise.
14214  */
14215 
14216 xmlDocPtr
xmlParseFile(const char * filename)14217 xmlParseFile(const char *filename) {
14218     return(xmlSAXParseFile(NULL, filename, 0));
14219 }
14220 
14221 /**
14222  * xmlRecoverFile:
14223  * @filename:  the filename
14224  *
14225  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226  * compressed document is provided by default if found at compile-time.
14227  * In the case the document is not Well Formed, it attempts to build
14228  * a tree anyway
14229  *
14230  * Returns the resulting document tree or NULL in case of failure
14231  */
14232 
14233 xmlDocPtr
xmlRecoverFile(const char * filename)14234 xmlRecoverFile(const char *filename) {
14235     return(xmlSAXParseFile(NULL, filename, 1));
14236 }
14237 
14238 
14239 /**
14240  * xmlSetupParserForBuffer:
14241  * @ctxt:  an XML parser context
14242  * @buffer:  a xmlChar * buffer
14243  * @filename:  a file name
14244  *
14245  * Setup the parser context to parse a new buffer; Clears any prior
14246  * contents from the parser context. The buffer parameter must not be
14247  * NULL, but the filename parameter can be
14248  */
14249 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14250 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14251                              const char* filename)
14252 {
14253     xmlParserInputPtr input;
14254 
14255     if ((ctxt == NULL) || (buffer == NULL))
14256         return;
14257 
14258     input = xmlNewInputStream(ctxt);
14259     if (input == NULL) {
14260         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14261         xmlClearParserCtxt(ctxt);
14262         return;
14263     }
14264 
14265     xmlClearParserCtxt(ctxt);
14266     if (filename != NULL)
14267         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14268     input->base = buffer;
14269     input->cur = buffer;
14270     input->end = &buffer[xmlStrlen(buffer)];
14271     inputPush(ctxt, input);
14272 }
14273 
14274 /**
14275  * xmlSAXUserParseFile:
14276  * @sax:  a SAX handler
14277  * @user_data:  The user data returned on SAX callbacks
14278  * @filename:  a file name
14279  *
14280  * parse an XML file and call the given SAX handler routines.
14281  * Automatic support for ZLIB/Compress compressed document is provided
14282  *
14283  * Returns 0 in case of success or a error number otherwise
14284  */
14285 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14286 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14287                     const char *filename) {
14288     int ret = 0;
14289     xmlParserCtxtPtr ctxt;
14290 
14291     ctxt = xmlCreateFileParserCtxt(filename);
14292     if (ctxt == NULL) return -1;
14293     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14294 	xmlFree(ctxt->sax);
14295     ctxt->sax = sax;
14296     xmlDetectSAX2(ctxt);
14297 
14298     if (user_data != NULL)
14299 	ctxt->userData = user_data;
14300 
14301     xmlParseDocument(ctxt);
14302 
14303     if (ctxt->wellFormed)
14304 	ret = 0;
14305     else {
14306         if (ctxt->errNo != 0)
14307 	    ret = ctxt->errNo;
14308 	else
14309 	    ret = -1;
14310     }
14311     if (sax != NULL)
14312 	ctxt->sax = NULL;
14313     if (ctxt->myDoc != NULL) {
14314         xmlFreeDoc(ctxt->myDoc);
14315 	ctxt->myDoc = NULL;
14316     }
14317     xmlFreeParserCtxt(ctxt);
14318 
14319     return ret;
14320 }
14321 #endif /* LIBXML_SAX1_ENABLED */
14322 
14323 /************************************************************************
14324  *									*
14325  *		Front ends when parsing from memory			*
14326  *									*
14327  ************************************************************************/
14328 
14329 /**
14330  * xmlCreateMemoryParserCtxt:
14331  * @buffer:  a pointer to a char array
14332  * @size:  the size of the array
14333  *
14334  * Create a parser context for an XML in-memory document.
14335  *
14336  * Returns the new parser context or NULL
14337  */
14338 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14339 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14340     xmlParserCtxtPtr ctxt;
14341     xmlParserInputPtr input;
14342     xmlParserInputBufferPtr buf;
14343 
14344     if (buffer == NULL)
14345 	return(NULL);
14346     if (size <= 0)
14347 	return(NULL);
14348 
14349     ctxt = xmlNewParserCtxt();
14350     if (ctxt == NULL)
14351 	return(NULL);
14352 
14353     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14354     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14355     if (buf == NULL) {
14356 	xmlFreeParserCtxt(ctxt);
14357 	return(NULL);
14358     }
14359 
14360     input = xmlNewInputStream(ctxt);
14361     if (input == NULL) {
14362 	xmlFreeParserInputBuffer(buf);
14363 	xmlFreeParserCtxt(ctxt);
14364 	return(NULL);
14365     }
14366 
14367     input->filename = NULL;
14368     input->buf = buf;
14369     xmlBufResetInput(input->buf->buffer, input);
14370 
14371     inputPush(ctxt, input);
14372     return(ctxt);
14373 }
14374 
14375 #ifdef LIBXML_SAX1_ENABLED
14376 /**
14377  * xmlSAXParseMemoryWithData:
14378  * @sax:  the SAX handler block
14379  * @buffer:  an pointer to a char array
14380  * @size:  the size of the array
14381  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14382  *             documents
14383  * @data:  the userdata
14384  *
14385  * parse an XML in-memory block and use the given SAX function block
14386  * to handle the parsing callback. If sax is NULL, fallback to the default
14387  * DOM tree building routines.
14388  *
14389  * User data (void *) is stored within the parser context in the
14390  * context's _private member, so it is available nearly everywhere in libxml
14391  *
14392  * Returns the resulting document tree
14393  */
14394 
14395 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14396 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14397 	          int size, int recovery, void *data) {
14398     xmlDocPtr ret;
14399     xmlParserCtxtPtr ctxt;
14400 
14401     xmlInitParser();
14402 
14403     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14404     if (ctxt == NULL) return(NULL);
14405     if (sax != NULL) {
14406 	if (ctxt->sax != NULL)
14407 	    xmlFree(ctxt->sax);
14408         ctxt->sax = sax;
14409     }
14410     xmlDetectSAX2(ctxt);
14411     if (data!=NULL) {
14412 	ctxt->_private=data;
14413     }
14414 
14415     ctxt->recovery = recovery;
14416 
14417     xmlParseDocument(ctxt);
14418 
14419     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14420     else {
14421        ret = NULL;
14422        xmlFreeDoc(ctxt->myDoc);
14423        ctxt->myDoc = NULL;
14424     }
14425     if (sax != NULL)
14426 	ctxt->sax = NULL;
14427     xmlFreeParserCtxt(ctxt);
14428 
14429     return(ret);
14430 }
14431 
14432 /**
14433  * xmlSAXParseMemory:
14434  * @sax:  the SAX handler block
14435  * @buffer:  an pointer to a char array
14436  * @size:  the size of the array
14437  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14438  *             documents
14439  *
14440  * parse an XML in-memory block and use the given SAX function block
14441  * to handle the parsing callback. If sax is NULL, fallback to the default
14442  * DOM tree building routines.
14443  *
14444  * Returns the resulting document tree
14445  */
14446 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14447 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14448 	          int size, int recovery) {
14449     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14450 }
14451 
14452 /**
14453  * xmlParseMemory:
14454  * @buffer:  an pointer to a char array
14455  * @size:  the size of the array
14456  *
14457  * parse an XML in-memory block and build a tree.
14458  *
14459  * Returns the resulting document tree
14460  */
14461 
xmlParseMemory(const char * buffer,int size)14462 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14463    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14464 }
14465 
14466 /**
14467  * xmlRecoverMemory:
14468  * @buffer:  an pointer to a char array
14469  * @size:  the size of the array
14470  *
14471  * parse an XML in-memory block and build a tree.
14472  * In the case the document is not Well Formed, an attempt to
14473  * build a tree is tried anyway
14474  *
14475  * Returns the resulting document tree or NULL in case of error
14476  */
14477 
xmlRecoverMemory(const char * buffer,int size)14478 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14479    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14480 }
14481 
14482 /**
14483  * xmlSAXUserParseMemory:
14484  * @sax:  a SAX handler
14485  * @user_data:  The user data returned on SAX callbacks
14486  * @buffer:  an in-memory XML document input
14487  * @size:  the length of the XML document in bytes
14488  *
14489  * A better SAX parsing routine.
14490  * parse an XML in-memory buffer and call the given SAX handler routines.
14491  *
14492  * Returns 0 in case of success or a error number otherwise
14493  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14494 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14495 			  const char *buffer, int size) {
14496     int ret = 0;
14497     xmlParserCtxtPtr ctxt;
14498 
14499     xmlInitParser();
14500 
14501     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14502     if (ctxt == NULL) return -1;
14503     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14504         xmlFree(ctxt->sax);
14505     ctxt->sax = sax;
14506     xmlDetectSAX2(ctxt);
14507 
14508     if (user_data != NULL)
14509 	ctxt->userData = user_data;
14510 
14511     xmlParseDocument(ctxt);
14512 
14513     if (ctxt->wellFormed)
14514 	ret = 0;
14515     else {
14516         if (ctxt->errNo != 0)
14517 	    ret = ctxt->errNo;
14518 	else
14519 	    ret = -1;
14520     }
14521     if (sax != NULL)
14522         ctxt->sax = NULL;
14523     if (ctxt->myDoc != NULL) {
14524         xmlFreeDoc(ctxt->myDoc);
14525 	ctxt->myDoc = NULL;
14526     }
14527     xmlFreeParserCtxt(ctxt);
14528 
14529     return ret;
14530 }
14531 #endif /* LIBXML_SAX1_ENABLED */
14532 
14533 /**
14534  * xmlCreateDocParserCtxt:
14535  * @cur:  a pointer to an array of xmlChar
14536  *
14537  * Creates a parser context for an XML in-memory document.
14538  *
14539  * Returns the new parser context or NULL
14540  */
14541 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14542 xmlCreateDocParserCtxt(const xmlChar *cur) {
14543     int len;
14544 
14545     if (cur == NULL)
14546 	return(NULL);
14547     len = xmlStrlen(cur);
14548     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14549 }
14550 
14551 #ifdef LIBXML_SAX1_ENABLED
14552 /**
14553  * xmlSAXParseDoc:
14554  * @sax:  the SAX handler block
14555  * @cur:  a pointer to an array of xmlChar
14556  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14557  *             documents
14558  *
14559  * parse an XML in-memory document and build a tree.
14560  * It use the given SAX function block to handle the parsing callback.
14561  * If sax is NULL, fallback to the default DOM tree building routines.
14562  *
14563  * Returns the resulting document tree
14564  */
14565 
14566 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14567 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14568     xmlDocPtr ret;
14569     xmlParserCtxtPtr ctxt;
14570     xmlSAXHandlerPtr oldsax = NULL;
14571 
14572     if (cur == NULL) return(NULL);
14573 
14574 
14575     ctxt = xmlCreateDocParserCtxt(cur);
14576     if (ctxt == NULL) return(NULL);
14577     if (sax != NULL) {
14578         oldsax = ctxt->sax;
14579         ctxt->sax = sax;
14580         ctxt->userData = NULL;
14581     }
14582     xmlDetectSAX2(ctxt);
14583 
14584     xmlParseDocument(ctxt);
14585     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14586     else {
14587        ret = NULL;
14588        xmlFreeDoc(ctxt->myDoc);
14589        ctxt->myDoc = NULL;
14590     }
14591     if (sax != NULL)
14592 	ctxt->sax = oldsax;
14593     xmlFreeParserCtxt(ctxt);
14594 
14595     return(ret);
14596 }
14597 
14598 /**
14599  * xmlParseDoc:
14600  * @cur:  a pointer to an array of xmlChar
14601  *
14602  * parse an XML in-memory document and build a tree.
14603  *
14604  * Returns the resulting document tree
14605  */
14606 
14607 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14608 xmlParseDoc(const xmlChar *cur) {
14609     return(xmlSAXParseDoc(NULL, cur, 0));
14610 }
14611 #endif /* LIBXML_SAX1_ENABLED */
14612 
14613 #ifdef LIBXML_LEGACY_ENABLED
14614 /************************************************************************
14615  *									*
14616  *	Specific function to keep track of entities references		*
14617  *	and used by the XSLT debugger					*
14618  *									*
14619  ************************************************************************/
14620 
14621 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14622 
14623 /**
14624  * xmlAddEntityReference:
14625  * @ent : A valid entity
14626  * @firstNode : A valid first node for children of entity
14627  * @lastNode : A valid last node of children entity
14628  *
14629  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14630  */
14631 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14632 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14633                       xmlNodePtr lastNode)
14634 {
14635     if (xmlEntityRefFunc != NULL) {
14636         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14637     }
14638 }
14639 
14640 
14641 /**
14642  * xmlSetEntityReferenceFunc:
14643  * @func: A valid function
14644  *
14645  * Set the function to call call back when a xml reference has been made
14646  */
14647 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14648 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14649 {
14650     xmlEntityRefFunc = func;
14651 }
14652 #endif /* LIBXML_LEGACY_ENABLED */
14653 
14654 /************************************************************************
14655  *									*
14656  *				Miscellaneous				*
14657  *									*
14658  ************************************************************************/
14659 
14660 #ifdef LIBXML_XPATH_ENABLED
14661 #include <libxml/xpath.h>
14662 #endif
14663 
14664 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14665 static int xmlParserInitialized = 0;
14666 
14667 /**
14668  * xmlInitParser:
14669  *
14670  * Initialization function for the XML parser.
14671  * This is not reentrant. Call once before processing in case of
14672  * use in multithreaded programs.
14673  */
14674 
14675 void
xmlInitParser(void)14676 xmlInitParser(void) {
14677     if (xmlParserInitialized != 0)
14678 	return;
14679 
14680 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14681     if (xmlFree == free)
14682         atexit(xmlCleanupParser);
14683 #endif
14684 
14685 #ifdef LIBXML_THREAD_ENABLED
14686     __xmlGlobalInitMutexLock();
14687     if (xmlParserInitialized == 0) {
14688 #endif
14689 	xmlInitThreads();
14690 	xmlInitGlobals();
14691 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14692 	    (xmlGenericError == NULL))
14693 	    initGenericErrorDefaultFunc(NULL);
14694 	xmlInitMemory();
14695         xmlInitializeDict();
14696 	xmlInitCharEncodingHandlers();
14697 	xmlDefaultSAXHandlerInit();
14698 	xmlRegisterDefaultInputCallbacks();
14699 #ifdef LIBXML_OUTPUT_ENABLED
14700 	xmlRegisterDefaultOutputCallbacks();
14701 #endif /* LIBXML_OUTPUT_ENABLED */
14702 #ifdef LIBXML_HTML_ENABLED
14703 	htmlInitAutoClose();
14704 	htmlDefaultSAXHandlerInit();
14705 #endif
14706 #ifdef LIBXML_XPATH_ENABLED
14707 	xmlXPathInit();
14708 #endif
14709 	xmlParserInitialized = 1;
14710 #ifdef LIBXML_THREAD_ENABLED
14711     }
14712     __xmlGlobalInitMutexUnlock();
14713 #endif
14714 }
14715 
14716 /**
14717  * xmlCleanupParser:
14718  *
14719  * This function name is somewhat misleading. It does not clean up
14720  * parser state, it cleans up memory allocated by the library itself.
14721  * It is a cleanup function for the XML library. It tries to reclaim all
14722  * related global memory allocated for the library processing.
14723  * It doesn't deallocate any document related memory. One should
14724  * call xmlCleanupParser() only when the process has finished using
14725  * the library and all XML/HTML documents built with it.
14726  * See also xmlInitParser() which has the opposite function of preparing
14727  * the library for operations.
14728  *
14729  * WARNING: if your application is multithreaded or has plugin support
14730  *          calling this may crash the application if another thread or
14731  *          a plugin is still using libxml2. It's sometimes very hard to
14732  *          guess if libxml2 is in use in the application, some libraries
14733  *          or plugins may use it without notice. In case of doubt abstain
14734  *          from calling this function or do it just before calling exit()
14735  *          to avoid leak reports from valgrind !
14736  */
14737 
14738 void
xmlCleanupParser(void)14739 xmlCleanupParser(void) {
14740     if (!xmlParserInitialized)
14741 	return;
14742 
14743     xmlCleanupCharEncodingHandlers();
14744 #ifdef LIBXML_CATALOG_ENABLED
14745     xmlCatalogCleanup();
14746 #endif
14747     xmlDictCleanup();
14748     xmlCleanupInputCallbacks();
14749 #ifdef LIBXML_OUTPUT_ENABLED
14750     xmlCleanupOutputCallbacks();
14751 #endif
14752 #ifdef LIBXML_SCHEMAS_ENABLED
14753     xmlSchemaCleanupTypes();
14754     xmlRelaxNGCleanupTypes();
14755 #endif
14756     xmlCleanupGlobals();
14757     xmlCleanupThreads(); /* must be last if called not from the main thread */
14758     xmlCleanupMemory();
14759     xmlParserInitialized = 0;
14760 }
14761 
14762 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14763     !defined(_WIN32)
14764 static void
14765 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14766 xmlDestructor(void) {
14767     /*
14768      * Calling custom deallocation functions in a destructor can cause
14769      * problems, for example with Nokogiri.
14770      */
14771     if (xmlFree == free)
14772         xmlCleanupParser();
14773 }
14774 #endif
14775 
14776 /************************************************************************
14777  *									*
14778  *	New set (2.6.0) of simpler and more flexible APIs		*
14779  *									*
14780  ************************************************************************/
14781 
14782 /**
14783  * DICT_FREE:
14784  * @str:  a string
14785  *
14786  * Free a string if it is not owned by the "dict" dictionary in the
14787  * current scope
14788  */
14789 #define DICT_FREE(str)						\
14790 	if ((str) && ((!dict) ||				\
14791 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14792 	    xmlFree((char *)(str));
14793 
14794 /**
14795  * xmlCtxtReset:
14796  * @ctxt: an XML parser context
14797  *
14798  * Reset a parser context
14799  */
14800 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14801 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14802 {
14803     xmlParserInputPtr input;
14804     xmlDictPtr dict;
14805 
14806     if (ctxt == NULL)
14807         return;
14808 
14809     dict = ctxt->dict;
14810 
14811     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812         xmlFreeInputStream(input);
14813     }
14814     ctxt->inputNr = 0;
14815     ctxt->input = NULL;
14816 
14817     ctxt->spaceNr = 0;
14818     if (ctxt->spaceTab != NULL) {
14819 	ctxt->spaceTab[0] = -1;
14820 	ctxt->space = &ctxt->spaceTab[0];
14821     } else {
14822         ctxt->space = NULL;
14823     }
14824 
14825 
14826     ctxt->nodeNr = 0;
14827     ctxt->node = NULL;
14828 
14829     ctxt->nameNr = 0;
14830     ctxt->name = NULL;
14831 
14832     ctxt->nsNr = 0;
14833 
14834     DICT_FREE(ctxt->version);
14835     ctxt->version = NULL;
14836     DICT_FREE(ctxt->encoding);
14837     ctxt->encoding = NULL;
14838     DICT_FREE(ctxt->directory);
14839     ctxt->directory = NULL;
14840     DICT_FREE(ctxt->extSubURI);
14841     ctxt->extSubURI = NULL;
14842     DICT_FREE(ctxt->extSubSystem);
14843     ctxt->extSubSystem = NULL;
14844     if (ctxt->myDoc != NULL)
14845         xmlFreeDoc(ctxt->myDoc);
14846     ctxt->myDoc = NULL;
14847 
14848     ctxt->standalone = -1;
14849     ctxt->hasExternalSubset = 0;
14850     ctxt->hasPErefs = 0;
14851     ctxt->html = 0;
14852     ctxt->external = 0;
14853     ctxt->instate = XML_PARSER_START;
14854     ctxt->token = 0;
14855 
14856     ctxt->wellFormed = 1;
14857     ctxt->nsWellFormed = 1;
14858     ctxt->disableSAX = 0;
14859     ctxt->valid = 1;
14860 #if 0
14861     ctxt->vctxt.userData = ctxt;
14862     ctxt->vctxt.error = xmlParserValidityError;
14863     ctxt->vctxt.warning = xmlParserValidityWarning;
14864 #endif
14865     ctxt->record_info = 0;
14866     ctxt->checkIndex = 0;
14867     ctxt->inSubset = 0;
14868     ctxt->errNo = XML_ERR_OK;
14869     ctxt->depth = 0;
14870     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871     ctxt->catalogs = NULL;
14872     ctxt->nbentities = 0;
14873     ctxt->sizeentities = 0;
14874     ctxt->sizeentcopy = 0;
14875     xmlInitNodeInfoSeq(&ctxt->node_seq);
14876 
14877     if (ctxt->attsDefault != NULL) {
14878         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14879         ctxt->attsDefault = NULL;
14880     }
14881     if (ctxt->attsSpecial != NULL) {
14882         xmlHashFree(ctxt->attsSpecial, NULL);
14883         ctxt->attsSpecial = NULL;
14884     }
14885 
14886 #ifdef LIBXML_CATALOG_ENABLED
14887     if (ctxt->catalogs != NULL)
14888 	xmlCatalogFreeLocal(ctxt->catalogs);
14889 #endif
14890     if (ctxt->lastError.code != XML_ERR_OK)
14891         xmlResetError(&ctxt->lastError);
14892 }
14893 
14894 /**
14895  * xmlCtxtResetPush:
14896  * @ctxt: an XML parser context
14897  * @chunk:  a pointer to an array of chars
14898  * @size:  number of chars in the array
14899  * @filename:  an optional file name or URI
14900  * @encoding:  the document encoding, or NULL
14901  *
14902  * Reset a push parser context
14903  *
14904  * Returns 0 in case of success and 1 in case of error
14905  */
14906 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14907 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908                  int size, const char *filename, const char *encoding)
14909 {
14910     xmlParserInputPtr inputStream;
14911     xmlParserInputBufferPtr buf;
14912     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913 
14914     if (ctxt == NULL)
14915         return(1);
14916 
14917     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919 
14920     buf = xmlAllocParserInputBuffer(enc);
14921     if (buf == NULL)
14922         return(1);
14923 
14924     if (ctxt == NULL) {
14925         xmlFreeParserInputBuffer(buf);
14926         return(1);
14927     }
14928 
14929     xmlCtxtReset(ctxt);
14930 
14931     if (filename == NULL) {
14932         ctxt->directory = NULL;
14933     } else {
14934         ctxt->directory = xmlParserGetDirectory(filename);
14935     }
14936 
14937     inputStream = xmlNewInputStream(ctxt);
14938     if (inputStream == NULL) {
14939         xmlFreeParserInputBuffer(buf);
14940         return(1);
14941     }
14942 
14943     if (filename == NULL)
14944         inputStream->filename = NULL;
14945     else
14946         inputStream->filename = (char *)
14947             xmlCanonicPath((const xmlChar *) filename);
14948     inputStream->buf = buf;
14949     xmlBufResetInput(buf->buffer, inputStream);
14950 
14951     inputPush(ctxt, inputStream);
14952 
14953     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14954         (ctxt->input->buf != NULL)) {
14955 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14956         size_t cur = ctxt->input->cur - ctxt->input->base;
14957 
14958         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14959 
14960         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14961 #ifdef DEBUG_PUSH
14962         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14963 #endif
14964     }
14965 
14966     if (encoding != NULL) {
14967         xmlCharEncodingHandlerPtr hdlr;
14968 
14969         if (ctxt->encoding != NULL)
14970 	    xmlFree((xmlChar *) ctxt->encoding);
14971         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14972 
14973         hdlr = xmlFindCharEncodingHandler(encoding);
14974         if (hdlr != NULL) {
14975             xmlSwitchToEncoding(ctxt, hdlr);
14976 	} else {
14977 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14978 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14979         }
14980     } else if (enc != XML_CHAR_ENCODING_NONE) {
14981         xmlSwitchEncoding(ctxt, enc);
14982     }
14983 
14984     return(0);
14985 }
14986 
14987 
14988 /**
14989  * xmlCtxtUseOptionsInternal:
14990  * @ctxt: an XML parser context
14991  * @options:  a combination of xmlParserOption
14992  * @encoding:  the user provided encoding to use
14993  *
14994  * Applies the options to the parser context
14995  *
14996  * Returns 0 in case of success, the set of unknown or unimplemented options
14997  *         in case of error.
14998  */
14999 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15000 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15001 {
15002     if (ctxt == NULL)
15003         return(-1);
15004     if (encoding != NULL) {
15005         if (ctxt->encoding != NULL)
15006 	    xmlFree((xmlChar *) ctxt->encoding);
15007         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15008     }
15009     if (options & XML_PARSE_RECOVER) {
15010         ctxt->recovery = 1;
15011         options -= XML_PARSE_RECOVER;
15012 	ctxt->options |= XML_PARSE_RECOVER;
15013     } else
15014         ctxt->recovery = 0;
15015     if (options & XML_PARSE_DTDLOAD) {
15016         ctxt->loadsubset = XML_DETECT_IDS;
15017         options -= XML_PARSE_DTDLOAD;
15018 	ctxt->options |= XML_PARSE_DTDLOAD;
15019     } else
15020         ctxt->loadsubset = 0;
15021     if (options & XML_PARSE_DTDATTR) {
15022         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15023         options -= XML_PARSE_DTDATTR;
15024 	ctxt->options |= XML_PARSE_DTDATTR;
15025     }
15026     if (options & XML_PARSE_NOENT) {
15027         ctxt->replaceEntities = 1;
15028         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15029         options -= XML_PARSE_NOENT;
15030 	ctxt->options |= XML_PARSE_NOENT;
15031     } else
15032         ctxt->replaceEntities = 0;
15033     if (options & XML_PARSE_PEDANTIC) {
15034         ctxt->pedantic = 1;
15035         options -= XML_PARSE_PEDANTIC;
15036 	ctxt->options |= XML_PARSE_PEDANTIC;
15037     } else
15038         ctxt->pedantic = 0;
15039     if (options & XML_PARSE_NOBLANKS) {
15040         ctxt->keepBlanks = 0;
15041         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15042         options -= XML_PARSE_NOBLANKS;
15043 	ctxt->options |= XML_PARSE_NOBLANKS;
15044     } else
15045         ctxt->keepBlanks = 1;
15046     if (options & XML_PARSE_DTDVALID) {
15047         ctxt->validate = 1;
15048         if (options & XML_PARSE_NOWARNING)
15049             ctxt->vctxt.warning = NULL;
15050         if (options & XML_PARSE_NOERROR)
15051             ctxt->vctxt.error = NULL;
15052         options -= XML_PARSE_DTDVALID;
15053 	ctxt->options |= XML_PARSE_DTDVALID;
15054     } else
15055         ctxt->validate = 0;
15056     if (options & XML_PARSE_NOWARNING) {
15057         ctxt->sax->warning = NULL;
15058         options -= XML_PARSE_NOWARNING;
15059     }
15060     if (options & XML_PARSE_NOERROR) {
15061         ctxt->sax->error = NULL;
15062         ctxt->sax->fatalError = NULL;
15063         options -= XML_PARSE_NOERROR;
15064     }
15065 #ifdef LIBXML_SAX1_ENABLED
15066     if (options & XML_PARSE_SAX1) {
15067         ctxt->sax->startElement = xmlSAX2StartElement;
15068         ctxt->sax->endElement = xmlSAX2EndElement;
15069         ctxt->sax->startElementNs = NULL;
15070         ctxt->sax->endElementNs = NULL;
15071         ctxt->sax->initialized = 1;
15072         options -= XML_PARSE_SAX1;
15073 	ctxt->options |= XML_PARSE_SAX1;
15074     }
15075 #endif /* LIBXML_SAX1_ENABLED */
15076     if (options & XML_PARSE_NODICT) {
15077         ctxt->dictNames = 0;
15078         options -= XML_PARSE_NODICT;
15079 	ctxt->options |= XML_PARSE_NODICT;
15080     } else {
15081         ctxt->dictNames = 1;
15082     }
15083     if (options & XML_PARSE_NOCDATA) {
15084         ctxt->sax->cdataBlock = NULL;
15085         options -= XML_PARSE_NOCDATA;
15086 	ctxt->options |= XML_PARSE_NOCDATA;
15087     }
15088     if (options & XML_PARSE_NSCLEAN) {
15089 	ctxt->options |= XML_PARSE_NSCLEAN;
15090         options -= XML_PARSE_NSCLEAN;
15091     }
15092     if (options & XML_PARSE_NONET) {
15093 	ctxt->options |= XML_PARSE_NONET;
15094         options -= XML_PARSE_NONET;
15095     }
15096     if (options & XML_PARSE_COMPACT) {
15097 	ctxt->options |= XML_PARSE_COMPACT;
15098         options -= XML_PARSE_COMPACT;
15099     }
15100     if (options & XML_PARSE_OLD10) {
15101 	ctxt->options |= XML_PARSE_OLD10;
15102         options -= XML_PARSE_OLD10;
15103     }
15104     if (options & XML_PARSE_NOBASEFIX) {
15105 	ctxt->options |= XML_PARSE_NOBASEFIX;
15106         options -= XML_PARSE_NOBASEFIX;
15107     }
15108     if (options & XML_PARSE_HUGE) {
15109 	ctxt->options |= XML_PARSE_HUGE;
15110         options -= XML_PARSE_HUGE;
15111         if (ctxt->dict != NULL)
15112             xmlDictSetLimit(ctxt->dict, 0);
15113     }
15114     if (options & XML_PARSE_OLDSAX) {
15115 	ctxt->options |= XML_PARSE_OLDSAX;
15116         options -= XML_PARSE_OLDSAX;
15117     }
15118     if (options & XML_PARSE_IGNORE_ENC) {
15119 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15120         options -= XML_PARSE_IGNORE_ENC;
15121     }
15122     if (options & XML_PARSE_BIG_LINES) {
15123 	ctxt->options |= XML_PARSE_BIG_LINES;
15124         options -= XML_PARSE_BIG_LINES;
15125     }
15126     ctxt->linenumbers = 1;
15127     return (options);
15128 }
15129 
15130 /**
15131  * xmlCtxtUseOptions:
15132  * @ctxt: an XML parser context
15133  * @options:  a combination of xmlParserOption
15134  *
15135  * Applies the options to the parser context
15136  *
15137  * Returns 0 in case of success, the set of unknown or unimplemented options
15138  *         in case of error.
15139  */
15140 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15141 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15142 {
15143    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15144 }
15145 
15146 /**
15147  * xmlDoRead:
15148  * @ctxt:  an XML parser context
15149  * @URL:  the base URL to use for the document
15150  * @encoding:  the document encoding, or NULL
15151  * @options:  a combination of xmlParserOption
15152  * @reuse:  keep the context for reuse
15153  *
15154  * Common front-end for the xmlRead functions
15155  *
15156  * Returns the resulting document tree or NULL
15157  */
15158 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15159 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15160           int options, int reuse)
15161 {
15162     xmlDocPtr ret;
15163 
15164     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15165     if (encoding != NULL) {
15166         xmlCharEncodingHandlerPtr hdlr;
15167 
15168 	hdlr = xmlFindCharEncodingHandler(encoding);
15169 	if (hdlr != NULL)
15170 	    xmlSwitchToEncoding(ctxt, hdlr);
15171     }
15172     if ((URL != NULL) && (ctxt->input != NULL) &&
15173         (ctxt->input->filename == NULL))
15174         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15175     xmlParseDocument(ctxt);
15176     if ((ctxt->wellFormed) || ctxt->recovery)
15177         ret = ctxt->myDoc;
15178     else {
15179         ret = NULL;
15180 	if (ctxt->myDoc != NULL) {
15181 	    xmlFreeDoc(ctxt->myDoc);
15182 	}
15183     }
15184     ctxt->myDoc = NULL;
15185     if (!reuse) {
15186 	xmlFreeParserCtxt(ctxt);
15187     }
15188 
15189     return (ret);
15190 }
15191 
15192 /**
15193  * xmlReadDoc:
15194  * @cur:  a pointer to a zero terminated string
15195  * @URL:  the base URL to use for the document
15196  * @encoding:  the document encoding, or NULL
15197  * @options:  a combination of xmlParserOption
15198  *
15199  * parse an XML in-memory document and build a tree.
15200  *
15201  * Returns the resulting document tree
15202  */
15203 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15204 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15205 {
15206     xmlParserCtxtPtr ctxt;
15207 
15208     if (cur == NULL)
15209         return (NULL);
15210     xmlInitParser();
15211 
15212     ctxt = xmlCreateDocParserCtxt(cur);
15213     if (ctxt == NULL)
15214         return (NULL);
15215     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15216 }
15217 
15218 /**
15219  * xmlReadFile:
15220  * @filename:  a file or URL
15221  * @encoding:  the document encoding, or NULL
15222  * @options:  a combination of xmlParserOption
15223  *
15224  * parse an XML file from the filesystem or the network.
15225  *
15226  * Returns the resulting document tree
15227  */
15228 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15229 xmlReadFile(const char *filename, const char *encoding, int options)
15230 {
15231     xmlParserCtxtPtr ctxt;
15232 
15233     xmlInitParser();
15234     ctxt = xmlCreateURLParserCtxt(filename, options);
15235     if (ctxt == NULL)
15236         return (NULL);
15237     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15238 }
15239 
15240 /**
15241  * xmlReadMemory:
15242  * @buffer:  a pointer to a char array
15243  * @size:  the size of the array
15244  * @URL:  the base URL to use for the document
15245  * @encoding:  the document encoding, or NULL
15246  * @options:  a combination of xmlParserOption
15247  *
15248  * parse an XML in-memory document and build a tree.
15249  *
15250  * Returns the resulting document tree
15251  */
15252 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15253 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15254 {
15255     xmlParserCtxtPtr ctxt;
15256 
15257     xmlInitParser();
15258     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15259     if (ctxt == NULL)
15260         return (NULL);
15261     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15262 }
15263 
15264 /**
15265  * xmlReadFd:
15266  * @fd:  an open file descriptor
15267  * @URL:  the base URL to use for the document
15268  * @encoding:  the document encoding, or NULL
15269  * @options:  a combination of xmlParserOption
15270  *
15271  * parse an XML from a file descriptor and build a tree.
15272  * NOTE that the file descriptor will not be closed when the
15273  *      reader is closed or reset.
15274  *
15275  * Returns the resulting document tree
15276  */
15277 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15278 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15279 {
15280     xmlParserCtxtPtr ctxt;
15281     xmlParserInputBufferPtr input;
15282     xmlParserInputPtr stream;
15283 
15284     if (fd < 0)
15285         return (NULL);
15286     xmlInitParser();
15287 
15288     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15289     if (input == NULL)
15290         return (NULL);
15291     input->closecallback = NULL;
15292     ctxt = xmlNewParserCtxt();
15293     if (ctxt == NULL) {
15294         xmlFreeParserInputBuffer(input);
15295         return (NULL);
15296     }
15297     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15298     if (stream == NULL) {
15299         xmlFreeParserInputBuffer(input);
15300 	xmlFreeParserCtxt(ctxt);
15301         return (NULL);
15302     }
15303     inputPush(ctxt, stream);
15304     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305 }
15306 
15307 /**
15308  * xmlReadIO:
15309  * @ioread:  an I/O read function
15310  * @ioclose:  an I/O close function
15311  * @ioctx:  an I/O handler
15312  * @URL:  the base URL to use for the document
15313  * @encoding:  the document encoding, or NULL
15314  * @options:  a combination of xmlParserOption
15315  *
15316  * parse an XML document from I/O functions and source and build a tree.
15317  *
15318  * Returns the resulting document tree
15319  */
15320 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15321 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15322           void *ioctx, const char *URL, const char *encoding, int options)
15323 {
15324     xmlParserCtxtPtr ctxt;
15325     xmlParserInputBufferPtr input;
15326     xmlParserInputPtr stream;
15327 
15328     if (ioread == NULL)
15329         return (NULL);
15330     xmlInitParser();
15331 
15332     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15333                                          XML_CHAR_ENCODING_NONE);
15334     if (input == NULL) {
15335         if (ioclose != NULL)
15336             ioclose(ioctx);
15337         return (NULL);
15338     }
15339     ctxt = xmlNewParserCtxt();
15340     if (ctxt == NULL) {
15341         xmlFreeParserInputBuffer(input);
15342         return (NULL);
15343     }
15344     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15345     if (stream == NULL) {
15346         xmlFreeParserInputBuffer(input);
15347 	xmlFreeParserCtxt(ctxt);
15348         return (NULL);
15349     }
15350     inputPush(ctxt, stream);
15351     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15352 }
15353 
15354 /**
15355  * xmlCtxtReadDoc:
15356  * @ctxt:  an XML parser context
15357  * @cur:  a pointer to a zero terminated string
15358  * @URL:  the base URL to use for the document
15359  * @encoding:  the document encoding, or NULL
15360  * @options:  a combination of xmlParserOption
15361  *
15362  * parse an XML in-memory document and build a tree.
15363  * This reuses the existing @ctxt parser context
15364  *
15365  * Returns the resulting document tree
15366  */
15367 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15368 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15369                const char *URL, const char *encoding, int options)
15370 {
15371     if (cur == NULL)
15372         return (NULL);
15373     return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15374                               encoding, options));
15375 }
15376 
15377 /**
15378  * xmlCtxtReadFile:
15379  * @ctxt:  an XML parser context
15380  * @filename:  a file or URL
15381  * @encoding:  the document encoding, or NULL
15382  * @options:  a combination of xmlParserOption
15383  *
15384  * parse an XML file from the filesystem or the network.
15385  * This reuses the existing @ctxt parser context
15386  *
15387  * Returns the resulting document tree
15388  */
15389 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15390 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15391                 const char *encoding, int options)
15392 {
15393     xmlParserInputPtr stream;
15394 
15395     if (filename == NULL)
15396         return (NULL);
15397     if (ctxt == NULL)
15398         return (NULL);
15399     xmlInitParser();
15400 
15401     xmlCtxtReset(ctxt);
15402 
15403     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15404     if (stream == NULL) {
15405         return (NULL);
15406     }
15407     inputPush(ctxt, stream);
15408     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15409 }
15410 
15411 /**
15412  * xmlCtxtReadMemory:
15413  * @ctxt:  an XML parser context
15414  * @buffer:  a pointer to a char array
15415  * @size:  the size of the array
15416  * @URL:  the base URL to use for the document
15417  * @encoding:  the document encoding, or NULL
15418  * @options:  a combination of xmlParserOption
15419  *
15420  * parse an XML in-memory document and build a tree.
15421  * This reuses the existing @ctxt parser context
15422  *
15423  * Returns the resulting document tree
15424  */
15425 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15426 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15427                   const char *URL, const char *encoding, int options)
15428 {
15429     xmlParserInputBufferPtr input;
15430     xmlParserInputPtr stream;
15431 
15432     if (ctxt == NULL)
15433         return (NULL);
15434     if (buffer == NULL)
15435         return (NULL);
15436     xmlInitParser();
15437 
15438     xmlCtxtReset(ctxt);
15439 
15440     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15441     if (input == NULL) {
15442 	return(NULL);
15443     }
15444 
15445     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15446     if (stream == NULL) {
15447 	xmlFreeParserInputBuffer(input);
15448 	return(NULL);
15449     }
15450 
15451     inputPush(ctxt, stream);
15452     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15453 }
15454 
15455 /**
15456  * xmlCtxtReadFd:
15457  * @ctxt:  an XML parser context
15458  * @fd:  an open file descriptor
15459  * @URL:  the base URL to use for the document
15460  * @encoding:  the document encoding, or NULL
15461  * @options:  a combination of xmlParserOption
15462  *
15463  * parse an XML from a file descriptor and build a tree.
15464  * This reuses the existing @ctxt parser context
15465  * NOTE that the file descriptor will not be closed when the
15466  *      reader is closed or reset.
15467  *
15468  * Returns the resulting document tree
15469  */
15470 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15471 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15472               const char *URL, const char *encoding, int options)
15473 {
15474     xmlParserInputBufferPtr input;
15475     xmlParserInputPtr stream;
15476 
15477     if (fd < 0)
15478         return (NULL);
15479     if (ctxt == NULL)
15480         return (NULL);
15481     xmlInitParser();
15482 
15483     xmlCtxtReset(ctxt);
15484 
15485 
15486     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15487     if (input == NULL)
15488         return (NULL);
15489     input->closecallback = NULL;
15490     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15491     if (stream == NULL) {
15492         xmlFreeParserInputBuffer(input);
15493         return (NULL);
15494     }
15495     inputPush(ctxt, stream);
15496     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15497 }
15498 
15499 /**
15500  * xmlCtxtReadIO:
15501  * @ctxt:  an XML parser context
15502  * @ioread:  an I/O read function
15503  * @ioclose:  an I/O close function
15504  * @ioctx:  an I/O handler
15505  * @URL:  the base URL to use for the document
15506  * @encoding:  the document encoding, or NULL
15507  * @options:  a combination of xmlParserOption
15508  *
15509  * parse an XML document from I/O functions and source and build a tree.
15510  * This reuses the existing @ctxt parser context
15511  *
15512  * Returns the resulting document tree
15513  */
15514 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15515 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15516               xmlInputCloseCallback ioclose, void *ioctx,
15517 	      const char *URL,
15518               const char *encoding, int options)
15519 {
15520     xmlParserInputBufferPtr input;
15521     xmlParserInputPtr stream;
15522 
15523     if (ioread == NULL)
15524         return (NULL);
15525     if (ctxt == NULL)
15526         return (NULL);
15527     xmlInitParser();
15528 
15529     xmlCtxtReset(ctxt);
15530 
15531     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15532                                          XML_CHAR_ENCODING_NONE);
15533     if (input == NULL) {
15534         if (ioclose != NULL)
15535             ioclose(ioctx);
15536         return (NULL);
15537     }
15538     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15539     if (stream == NULL) {
15540         xmlFreeParserInputBuffer(input);
15541         return (NULL);
15542     }
15543     inputPush(ctxt, stream);
15544     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15545 }
15546 
15547