1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86 
87 #include "buf.h"
88 #include "enc.h"
89 
90 struct _xmlStartTag {
91     const xmlChar *prefix;
92     const xmlChar *URI;
93     int line;
94     int nsNr;
95 };
96 
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99 
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
103 
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105 
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108 
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111 
112 /************************************************************************
113  *									*
114  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
115  *									*
116  ************************************************************************/
117 
118 #define XML_PARSER_BIG_ENTITY 1000
119 #define XML_PARSER_LOT_ENTITY 5000
120 
121 /*
122  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123  *    replacement over the size in byte of the input indicates that you have
124  *    and exponential behaviour. A value of 10 correspond to at least 3 entity
125  *    replacement per byte of input.
126  */
127 #define XML_PARSER_NON_LINEAR 10
128 
129 /*
130  * xmlParserEntityCheck
131  *
132  * Function to check non-linear entity expansion behaviour
133  * This is here to detect and stop exponential linear entity expansion
134  * This is not a limitation of the parser but a safety
135  * boundary feature. It can be disabled with the XML_PARSE_HUGE
136  * parser option.
137  */
138 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)139 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
140                      xmlEntityPtr ent, size_t replacement)
141 {
142     size_t consumed = 0;
143     int i;
144 
145     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146         return (0);
147     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148         return (1);
149 
150     /*
151      * This may look absurd but is needed to detect
152      * entities problems
153      */
154     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
155 	(ent->content != NULL) && (ent->checked == 0) &&
156 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
157 	unsigned long oldnbent = ctxt->nbentities, diff;
158 	xmlChar *rep;
159 
160 	ent->checked = 1;
161 
162         ++ctxt->depth;
163 	rep = xmlStringDecodeEntities(ctxt, ent->content,
164 				  XML_SUBSTITUTE_REF, 0, 0, 0);
165         --ctxt->depth;
166 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
167 	    ent->content[0] = 0;
168 	}
169 
170         diff = ctxt->nbentities - oldnbent + 1;
171         if (diff > INT_MAX / 2)
172             diff = INT_MAX / 2;
173 	ent->checked = diff * 2;
174 	if (rep != NULL) {
175 	    if (xmlStrchr(rep, '<'))
176 		ent->checked |= 1;
177 	    xmlFree(rep);
178 	    rep = NULL;
179 	}
180     }
181 
182     /*
183      * Prevent entity exponential check, not just replacement while
184      * parsing the DTD
185      * The check is potentially costly so do that only once in a thousand
186      */
187     if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188         (ctxt->nbentities % 1024 == 0)) {
189 	for (i = 0;i < ctxt->inputNr;i++) {
190 	    consumed += ctxt->inputTab[i]->consumed +
191 	               (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192 	}
193 	if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
194 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
195 	    ctxt->instate = XML_PARSER_EOF;
196 	    return (1);
197 	}
198 	consumed = 0;
199     }
200 
201 
202 
203     if (replacement != 0) {
204 	if (replacement < XML_MAX_TEXT_LENGTH)
205 	    return(0);
206 
207         /*
208 	 * If the volume of entity copy reaches 10 times the
209 	 * amount of parsed data and over the large text threshold
210 	 * then that's very likely to be an abuse.
211 	 */
212         if (ctxt->input != NULL) {
213 	    consumed = ctxt->input->consumed +
214 	               (ctxt->input->cur - ctxt->input->base);
215 	}
216         consumed += ctxt->sizeentities;
217 
218         if (replacement < XML_PARSER_NON_LINEAR * consumed)
219 	    return(0);
220     } else if (size != 0) {
221         /*
222          * Do the check based on the replacement size of the entity
223          */
224         if (size < XML_PARSER_BIG_ENTITY)
225 	    return(0);
226 
227         /*
228          * A limit on the amount of text data reasonably used
229          */
230         if (ctxt->input != NULL) {
231             consumed = ctxt->input->consumed +
232                 (ctxt->input->cur - ctxt->input->base);
233         }
234         consumed += ctxt->sizeentities;
235 
236         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238             return (0);
239     } else if (ent != NULL) {
240         /*
241          * use the number of parsed entities in the replacement
242          */
243         size = ent->checked / 2;
244 
245         /*
246          * The amount of data parsed counting entities size only once
247          */
248         if (ctxt->input != NULL) {
249             consumed = ctxt->input->consumed +
250                 (ctxt->input->cur - ctxt->input->base);
251         }
252         consumed += ctxt->sizeentities;
253 
254         /*
255          * Check the density of entities for the amount of data
256 	 * knowing an entity reference will take at least 3 bytes
257          */
258         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259             return (0);
260     } else {
261         /*
262          * strange we got no data for checking
263          */
264 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
265 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
266 	    (ctxt->nbentities <= 10000))
267 	    return (0);
268     }
269     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
270     return (1);
271 }
272 
273 /**
274  * xmlParserMaxDepth:
275  *
276  * arbitrary depth limit for the XML documents that we allow to
277  * process. This is not a limitation of the parser but a safety
278  * boundary feature. It can be disabled with the XML_PARSE_HUGE
279  * parser option.
280  */
281 unsigned int xmlParserMaxDepth = 256;
282 
283 
284 
285 #define SAX2 1
286 #define XML_PARSER_BIG_BUFFER_SIZE 300
287 #define XML_PARSER_BUFFER_SIZE 100
288 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289 
290 /**
291  * XML_PARSER_CHUNK_SIZE
292  *
293  * When calling GROW that's the minimal amount of data
294  * the parser expected to have received. It is not a hard
295  * limit but an optimization when reading strings like Names
296  * It is not strictly needed as long as inputs available characters
297  * are followed by 0, which should be provided by the I/O level
298  */
299 #define XML_PARSER_CHUNK_SIZE 100
300 
301 /*
302  * List of XML prefixed PI allowed by W3C specs
303  */
304 
305 static const char *xmlW3CPIs[] = {
306     "xml-stylesheet",
307     "xml-model",
308     NULL
309 };
310 
311 
312 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
313 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
314                                               const xmlChar **str);
315 
316 static xmlParserErrors
317 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
318 	              xmlSAXHandlerPtr sax,
319 		      void *user_data, int depth, const xmlChar *URL,
320 		      const xmlChar *ID, xmlNodePtr *list);
321 
322 static int
323 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
324                           const char *encoding);
325 #ifdef LIBXML_LEGACY_ENABLED
326 static void
327 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328                       xmlNodePtr lastNode);
329 #endif /* LIBXML_LEGACY_ENABLED */
330 
331 static xmlParserErrors
332 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
333 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
334 
335 static int
336 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
337 
338 /************************************************************************
339  *									*
340  *		Some factorized error routines				*
341  *									*
342  ************************************************************************/
343 
344 /**
345  * xmlErrAttributeDup:
346  * @ctxt:  an XML parser context
347  * @prefix:  the attribute prefix
348  * @localname:  the attribute localname
349  *
350  * Handle a redefinition of attribute error
351  */
352 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)353 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
354                    const xmlChar * localname)
355 {
356     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357         (ctxt->instate == XML_PARSER_EOF))
358 	return;
359     if (ctxt != NULL)
360 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
361 
362     if (prefix == NULL)
363         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
364                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
365                         (const char *) localname, NULL, NULL, 0, 0,
366                         "Attribute %s redefined\n", localname);
367     else
368         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
369                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
370                         (const char *) prefix, (const char *) localname,
371                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372                         localname);
373     if (ctxt != NULL) {
374 	ctxt->wellFormed = 0;
375 	if (ctxt->recovery == 0)
376 	    ctxt->disableSAX = 1;
377     }
378 }
379 
380 /**
381  * xmlFatalErr:
382  * @ctxt:  an XML parser context
383  * @error:  the error number
384  * @extra:  extra information string
385  *
386  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387  */
388 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)389 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
390 {
391     const char *errmsg;
392 
393     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394         (ctxt->instate == XML_PARSER_EOF))
395 	return;
396     switch (error) {
397         case XML_ERR_INVALID_HEX_CHARREF:
398             errmsg = "CharRef: invalid hexadecimal value";
399             break;
400         case XML_ERR_INVALID_DEC_CHARREF:
401             errmsg = "CharRef: invalid decimal value";
402             break;
403         case XML_ERR_INVALID_CHARREF:
404             errmsg = "CharRef: invalid value";
405             break;
406         case XML_ERR_INTERNAL_ERROR:
407             errmsg = "internal error";
408             break;
409         case XML_ERR_PEREF_AT_EOF:
410             errmsg = "PEReference at end of document";
411             break;
412         case XML_ERR_PEREF_IN_PROLOG:
413             errmsg = "PEReference in prolog";
414             break;
415         case XML_ERR_PEREF_IN_EPILOG:
416             errmsg = "PEReference in epilog";
417             break;
418         case XML_ERR_PEREF_NO_NAME:
419             errmsg = "PEReference: no name";
420             break;
421         case XML_ERR_PEREF_SEMICOL_MISSING:
422             errmsg = "PEReference: expecting ';'";
423             break;
424         case XML_ERR_ENTITY_LOOP:
425             errmsg = "Detected an entity reference loop";
426             break;
427         case XML_ERR_ENTITY_NOT_STARTED:
428             errmsg = "EntityValue: \" or ' expected";
429             break;
430         case XML_ERR_ENTITY_PE_INTERNAL:
431             errmsg = "PEReferences forbidden in internal subset";
432             break;
433         case XML_ERR_ENTITY_NOT_FINISHED:
434             errmsg = "EntityValue: \" or ' expected";
435             break;
436         case XML_ERR_ATTRIBUTE_NOT_STARTED:
437             errmsg = "AttValue: \" or ' expected";
438             break;
439         case XML_ERR_LT_IN_ATTRIBUTE:
440             errmsg = "Unescaped '<' not allowed in attributes values";
441             break;
442         case XML_ERR_LITERAL_NOT_STARTED:
443             errmsg = "SystemLiteral \" or ' expected";
444             break;
445         case XML_ERR_LITERAL_NOT_FINISHED:
446             errmsg = "Unfinished System or Public ID \" or ' expected";
447             break;
448         case XML_ERR_MISPLACED_CDATA_END:
449             errmsg = "Sequence ']]>' not allowed in content";
450             break;
451         case XML_ERR_URI_REQUIRED:
452             errmsg = "SYSTEM or PUBLIC, the URI is missing";
453             break;
454         case XML_ERR_PUBID_REQUIRED:
455             errmsg = "PUBLIC, the Public Identifier is missing";
456             break;
457         case XML_ERR_HYPHEN_IN_COMMENT:
458             errmsg = "Comment must not contain '--' (double-hyphen)";
459             break;
460         case XML_ERR_PI_NOT_STARTED:
461             errmsg = "xmlParsePI : no target name";
462             break;
463         case XML_ERR_RESERVED_XML_NAME:
464             errmsg = "Invalid PI name";
465             break;
466         case XML_ERR_NOTATION_NOT_STARTED:
467             errmsg = "NOTATION: Name expected here";
468             break;
469         case XML_ERR_NOTATION_NOT_FINISHED:
470             errmsg = "'>' required to close NOTATION declaration";
471             break;
472         case XML_ERR_VALUE_REQUIRED:
473             errmsg = "Entity value required";
474             break;
475         case XML_ERR_URI_FRAGMENT:
476             errmsg = "Fragment not allowed";
477             break;
478         case XML_ERR_ATTLIST_NOT_STARTED:
479             errmsg = "'(' required to start ATTLIST enumeration";
480             break;
481         case XML_ERR_NMTOKEN_REQUIRED:
482             errmsg = "NmToken expected in ATTLIST enumeration";
483             break;
484         case XML_ERR_ATTLIST_NOT_FINISHED:
485             errmsg = "')' required to finish ATTLIST enumeration";
486             break;
487         case XML_ERR_MIXED_NOT_STARTED:
488             errmsg = "MixedContentDecl : '|' or ')*' expected";
489             break;
490         case XML_ERR_PCDATA_REQUIRED:
491             errmsg = "MixedContentDecl : '#PCDATA' expected";
492             break;
493         case XML_ERR_ELEMCONTENT_NOT_STARTED:
494             errmsg = "ContentDecl : Name or '(' expected";
495             break;
496         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
497             errmsg = "ContentDecl : ',' '|' or ')' expected";
498             break;
499         case XML_ERR_PEREF_IN_INT_SUBSET:
500             errmsg =
501                 "PEReference: forbidden within markup decl in internal subset";
502             break;
503         case XML_ERR_GT_REQUIRED:
504             errmsg = "expected '>'";
505             break;
506         case XML_ERR_CONDSEC_INVALID:
507             errmsg = "XML conditional section '[' expected";
508             break;
509         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
510             errmsg = "Content error in the external subset";
511             break;
512         case XML_ERR_CONDSEC_INVALID_KEYWORD:
513             errmsg =
514                 "conditional section INCLUDE or IGNORE keyword expected";
515             break;
516         case XML_ERR_CONDSEC_NOT_FINISHED:
517             errmsg = "XML conditional section not closed";
518             break;
519         case XML_ERR_XMLDECL_NOT_STARTED:
520             errmsg = "Text declaration '<?xml' required";
521             break;
522         case XML_ERR_XMLDECL_NOT_FINISHED:
523             errmsg = "parsing XML declaration: '?>' expected";
524             break;
525         case XML_ERR_EXT_ENTITY_STANDALONE:
526             errmsg = "external parsed entities cannot be standalone";
527             break;
528         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
529             errmsg = "EntityRef: expecting ';'";
530             break;
531         case XML_ERR_DOCTYPE_NOT_FINISHED:
532             errmsg = "DOCTYPE improperly terminated";
533             break;
534         case XML_ERR_LTSLASH_REQUIRED:
535             errmsg = "EndTag: '</' not found";
536             break;
537         case XML_ERR_EQUAL_REQUIRED:
538             errmsg = "expected '='";
539             break;
540         case XML_ERR_STRING_NOT_CLOSED:
541             errmsg = "String not closed expecting \" or '";
542             break;
543         case XML_ERR_STRING_NOT_STARTED:
544             errmsg = "String not started expecting ' or \"";
545             break;
546         case XML_ERR_ENCODING_NAME:
547             errmsg = "Invalid XML encoding name";
548             break;
549         case XML_ERR_STANDALONE_VALUE:
550             errmsg = "standalone accepts only 'yes' or 'no'";
551             break;
552         case XML_ERR_DOCUMENT_EMPTY:
553             errmsg = "Document is empty";
554             break;
555         case XML_ERR_DOCUMENT_END:
556             errmsg = "Extra content at the end of the document";
557             break;
558         case XML_ERR_NOT_WELL_BALANCED:
559             errmsg = "chunk is not well balanced";
560             break;
561         case XML_ERR_EXTRA_CONTENT:
562             errmsg = "extra content at the end of well balanced chunk";
563             break;
564         case XML_ERR_VERSION_MISSING:
565             errmsg = "Malformed declaration expecting version";
566             break;
567         case XML_ERR_NAME_TOO_LONG:
568             errmsg = "Name too long use XML_PARSE_HUGE option";
569             break;
570 #if 0
571         case:
572             errmsg = "";
573             break;
574 #endif
575         default:
576             errmsg = "Unregistered error message";
577     }
578     if (ctxt != NULL)
579 	ctxt->errNo = error;
580     if (info == NULL) {
581         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583                         errmsg);
584     } else {
585         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587                         errmsg, info);
588     }
589     if (ctxt != NULL) {
590 	ctxt->wellFormed = 0;
591 	if (ctxt->recovery == 0)
592 	    ctxt->disableSAX = 1;
593     }
594 }
595 
596 /**
597  * xmlFatalErrMsg:
598  * @ctxt:  an XML parser context
599  * @error:  the error number
600  * @msg:  the error message
601  *
602  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603  */
604 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)605 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606                const char *msg)
607 {
608     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609         (ctxt->instate == XML_PARSER_EOF))
610 	return;
611     if (ctxt != NULL)
612 	ctxt->errNo = error;
613     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
614                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
615     if (ctxt != NULL) {
616 	ctxt->wellFormed = 0;
617 	if (ctxt->recovery == 0)
618 	    ctxt->disableSAX = 1;
619     }
620 }
621 
622 /**
623  * xmlWarningMsg:
624  * @ctxt:  an XML parser context
625  * @error:  the error number
626  * @msg:  the error message
627  * @str1:  extra data
628  * @str2:  extra data
629  *
630  * Handle a warning.
631  */
632 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)633 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634               const char *msg, const xmlChar *str1, const xmlChar *str2)
635 {
636     xmlStructuredErrorFunc schannel = NULL;
637 
638     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639         (ctxt->instate == XML_PARSER_EOF))
640 	return;
641     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642         (ctxt->sax->initialized == XML_SAX2_MAGIC))
643         schannel = ctxt->sax->serror;
644     if (ctxt != NULL) {
645         __xmlRaiseError(schannel,
646                     (ctxt->sax) ? ctxt->sax->warning : NULL,
647                     ctxt->userData,
648                     ctxt, NULL, XML_FROM_PARSER, error,
649                     XML_ERR_WARNING, NULL, 0,
650 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
651 		    msg, (const char *) str1, (const char *) str2);
652     } else {
653         __xmlRaiseError(schannel, NULL, NULL,
654                     ctxt, NULL, XML_FROM_PARSER, error,
655                     XML_ERR_WARNING, NULL, 0,
656 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
657 		    msg, (const char *) str1, (const char *) str2);
658     }
659 }
660 
661 /**
662  * xmlValidityError:
663  * @ctxt:  an XML parser context
664  * @error:  the error number
665  * @msg:  the error message
666  * @str1:  extra data
667  *
668  * Handle a validity error.
669  */
670 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)671 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
672               const char *msg, const xmlChar *str1, const xmlChar *str2)
673 {
674     xmlStructuredErrorFunc schannel = NULL;
675 
676     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677         (ctxt->instate == XML_PARSER_EOF))
678 	return;
679     if (ctxt != NULL) {
680 	ctxt->errNo = error;
681 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682 	    schannel = ctxt->sax->serror;
683     }
684     if (ctxt != NULL) {
685         __xmlRaiseError(schannel,
686                     ctxt->vctxt.error, ctxt->vctxt.userData,
687                     ctxt, NULL, XML_FROM_DTD, error,
688                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
689 		    (const char *) str2, NULL, 0, 0,
690 		    msg, (const char *) str1, (const char *) str2);
691 	ctxt->valid = 0;
692     } else {
693         __xmlRaiseError(schannel, NULL, NULL,
694                     ctxt, NULL, XML_FROM_DTD, error,
695                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
696 		    (const char *) str2, NULL, 0, 0,
697 		    msg, (const char *) str1, (const char *) str2);
698     }
699 }
700 
701 /**
702  * xmlFatalErrMsgInt:
703  * @ctxt:  an XML parser context
704  * @error:  the error number
705  * @msg:  the error message
706  * @val:  an integer value
707  *
708  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709  */
710 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)711 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
712                   const char *msg, int val)
713 {
714     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715         (ctxt->instate == XML_PARSER_EOF))
716 	return;
717     if (ctxt != NULL)
718 	ctxt->errNo = error;
719     __xmlRaiseError(NULL, NULL, NULL,
720                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
721                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
722     if (ctxt != NULL) {
723 	ctxt->wellFormed = 0;
724 	if (ctxt->recovery == 0)
725 	    ctxt->disableSAX = 1;
726     }
727 }
728 
729 /**
730  * xmlFatalErrMsgStrIntStr:
731  * @ctxt:  an XML parser context
732  * @error:  the error number
733  * @msg:  the error message
734  * @str1:  an string info
735  * @val:  an integer value
736  * @str2:  an string info
737  *
738  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739  */
740 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)741 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742                   const char *msg, const xmlChar *str1, int val,
743 		  const xmlChar *str2)
744 {
745     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746         (ctxt->instate == XML_PARSER_EOF))
747 	return;
748     if (ctxt != NULL)
749 	ctxt->errNo = error;
750     __xmlRaiseError(NULL, NULL, NULL,
751                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
752                     NULL, 0, (const char *) str1, (const char *) str2,
753 		    NULL, val, 0, msg, str1, val, str2);
754     if (ctxt != NULL) {
755 	ctxt->wellFormed = 0;
756 	if (ctxt->recovery == 0)
757 	    ctxt->disableSAX = 1;
758     }
759 }
760 
761 /**
762  * xmlFatalErrMsgStr:
763  * @ctxt:  an XML parser context
764  * @error:  the error number
765  * @msg:  the error message
766  * @val:  a string value
767  *
768  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
769  */
770 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)771 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
772                   const char *msg, const xmlChar * val)
773 {
774     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775         (ctxt->instate == XML_PARSER_EOF))
776 	return;
777     if (ctxt != NULL)
778 	ctxt->errNo = error;
779     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
780                     XML_FROM_PARSER, error, XML_ERR_FATAL,
781                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782                     val);
783     if (ctxt != NULL) {
784 	ctxt->wellFormed = 0;
785 	if (ctxt->recovery == 0)
786 	    ctxt->disableSAX = 1;
787     }
788 }
789 
790 /**
791  * xmlErrMsgStr:
792  * @ctxt:  an XML parser context
793  * @error:  the error number
794  * @msg:  the error message
795  * @val:  a string value
796  *
797  * Handle a non fatal parser error
798  */
799 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)800 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801                   const char *msg, const xmlChar * val)
802 {
803     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804         (ctxt->instate == XML_PARSER_EOF))
805 	return;
806     if (ctxt != NULL)
807 	ctxt->errNo = error;
808     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
809                     XML_FROM_PARSER, error, XML_ERR_ERROR,
810                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811                     val);
812 }
813 
814 /**
815  * xmlNsErr:
816  * @ctxt:  an XML parser context
817  * @error:  the error number
818  * @msg:  the message
819  * @info1:  extra information string
820  * @info2:  extra information string
821  *
822  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
823  */
824 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)825 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826          const char *msg,
827          const xmlChar * info1, const xmlChar * info2,
828          const xmlChar * info3)
829 {
830     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831         (ctxt->instate == XML_PARSER_EOF))
832 	return;
833     if (ctxt != NULL)
834 	ctxt->errNo = error;
835     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
836                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
837                     (const char *) info2, (const char *) info3, 0, 0, msg,
838                     info1, info2, info3);
839     if (ctxt != NULL)
840 	ctxt->nsWellFormed = 0;
841 }
842 
843 /**
844  * xmlNsWarn
845  * @ctxt:  an XML parser context
846  * @error:  the error number
847  * @msg:  the message
848  * @info1:  extra information string
849  * @info2:  extra information string
850  *
851  * Handle a namespace warning error
852  */
853 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)854 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855          const char *msg,
856          const xmlChar * info1, const xmlChar * info2,
857          const xmlChar * info3)
858 {
859     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860         (ctxt->instate == XML_PARSER_EOF))
861 	return;
862     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
864                     (const char *) info2, (const char *) info3, 0, 0, msg,
865                     info1, info2, info3);
866 }
867 
868 /************************************************************************
869  *									*
870  *		Library wide options					*
871  *									*
872  ************************************************************************/
873 
874 /**
875   * xmlHasFeature:
876   * @feature: the feature to be examined
877   *
878   * Examines if the library has been compiled with a given feature.
879   *
880   * Returns a non-zero value if the feature exist, otherwise zero.
881   * Returns zero (0) if the feature does not exist or an unknown
882   * unknown feature is requested, non-zero otherwise.
883   */
884 int
xmlHasFeature(xmlFeature feature)885 xmlHasFeature(xmlFeature feature)
886 {
887     switch (feature) {
888 	case XML_WITH_THREAD:
889 #ifdef LIBXML_THREAD_ENABLED
890 	    return(1);
891 #else
892 	    return(0);
893 #endif
894         case XML_WITH_TREE:
895 #ifdef LIBXML_TREE_ENABLED
896             return(1);
897 #else
898             return(0);
899 #endif
900         case XML_WITH_OUTPUT:
901 #ifdef LIBXML_OUTPUT_ENABLED
902             return(1);
903 #else
904             return(0);
905 #endif
906         case XML_WITH_PUSH:
907 #ifdef LIBXML_PUSH_ENABLED
908             return(1);
909 #else
910             return(0);
911 #endif
912         case XML_WITH_READER:
913 #ifdef LIBXML_READER_ENABLED
914             return(1);
915 #else
916             return(0);
917 #endif
918         case XML_WITH_PATTERN:
919 #ifdef LIBXML_PATTERN_ENABLED
920             return(1);
921 #else
922             return(0);
923 #endif
924         case XML_WITH_WRITER:
925 #ifdef LIBXML_WRITER_ENABLED
926             return(1);
927 #else
928             return(0);
929 #endif
930         case XML_WITH_SAX1:
931 #ifdef LIBXML_SAX1_ENABLED
932             return(1);
933 #else
934             return(0);
935 #endif
936         case XML_WITH_FTP:
937 #ifdef LIBXML_FTP_ENABLED
938             return(1);
939 #else
940             return(0);
941 #endif
942         case XML_WITH_HTTP:
943 #ifdef LIBXML_HTTP_ENABLED
944             return(1);
945 #else
946             return(0);
947 #endif
948         case XML_WITH_VALID:
949 #ifdef LIBXML_VALID_ENABLED
950             return(1);
951 #else
952             return(0);
953 #endif
954         case XML_WITH_HTML:
955 #ifdef LIBXML_HTML_ENABLED
956             return(1);
957 #else
958             return(0);
959 #endif
960         case XML_WITH_LEGACY:
961 #ifdef LIBXML_LEGACY_ENABLED
962             return(1);
963 #else
964             return(0);
965 #endif
966         case XML_WITH_C14N:
967 #ifdef LIBXML_C14N_ENABLED
968             return(1);
969 #else
970             return(0);
971 #endif
972         case XML_WITH_CATALOG:
973 #ifdef LIBXML_CATALOG_ENABLED
974             return(1);
975 #else
976             return(0);
977 #endif
978         case XML_WITH_XPATH:
979 #ifdef LIBXML_XPATH_ENABLED
980             return(1);
981 #else
982             return(0);
983 #endif
984         case XML_WITH_XPTR:
985 #ifdef LIBXML_XPTR_ENABLED
986             return(1);
987 #else
988             return(0);
989 #endif
990         case XML_WITH_XINCLUDE:
991 #ifdef LIBXML_XINCLUDE_ENABLED
992             return(1);
993 #else
994             return(0);
995 #endif
996         case XML_WITH_ICONV:
997 #ifdef LIBXML_ICONV_ENABLED
998             return(1);
999 #else
1000             return(0);
1001 #endif
1002         case XML_WITH_ISO8859X:
1003 #ifdef LIBXML_ISO8859X_ENABLED
1004             return(1);
1005 #else
1006             return(0);
1007 #endif
1008         case XML_WITH_UNICODE:
1009 #ifdef LIBXML_UNICODE_ENABLED
1010             return(1);
1011 #else
1012             return(0);
1013 #endif
1014         case XML_WITH_REGEXP:
1015 #ifdef LIBXML_REGEXP_ENABLED
1016             return(1);
1017 #else
1018             return(0);
1019 #endif
1020         case XML_WITH_AUTOMATA:
1021 #ifdef LIBXML_AUTOMATA_ENABLED
1022             return(1);
1023 #else
1024             return(0);
1025 #endif
1026         case XML_WITH_EXPR:
1027 #ifdef LIBXML_EXPR_ENABLED
1028             return(1);
1029 #else
1030             return(0);
1031 #endif
1032         case XML_WITH_SCHEMAS:
1033 #ifdef LIBXML_SCHEMAS_ENABLED
1034             return(1);
1035 #else
1036             return(0);
1037 #endif
1038         case XML_WITH_SCHEMATRON:
1039 #ifdef LIBXML_SCHEMATRON_ENABLED
1040             return(1);
1041 #else
1042             return(0);
1043 #endif
1044         case XML_WITH_MODULES:
1045 #ifdef LIBXML_MODULES_ENABLED
1046             return(1);
1047 #else
1048             return(0);
1049 #endif
1050         case XML_WITH_DEBUG:
1051 #ifdef LIBXML_DEBUG_ENABLED
1052             return(1);
1053 #else
1054             return(0);
1055 #endif
1056         case XML_WITH_DEBUG_MEM:
1057 #ifdef DEBUG_MEMORY_LOCATION
1058             return(1);
1059 #else
1060             return(0);
1061 #endif
1062         case XML_WITH_DEBUG_RUN:
1063 #ifdef LIBXML_DEBUG_RUNTIME
1064             return(1);
1065 #else
1066             return(0);
1067 #endif
1068         case XML_WITH_ZLIB:
1069 #ifdef LIBXML_ZLIB_ENABLED
1070             return(1);
1071 #else
1072             return(0);
1073 #endif
1074         case XML_WITH_LZMA:
1075 #ifdef LIBXML_LZMA_ENABLED
1076             return(1);
1077 #else
1078             return(0);
1079 #endif
1080         case XML_WITH_ICU:
1081 #ifdef LIBXML_ICU_ENABLED
1082             return(1);
1083 #else
1084             return(0);
1085 #endif
1086         default:
1087 	    break;
1088      }
1089      return(0);
1090 }
1091 
1092 /************************************************************************
1093  *									*
1094  *		SAX2 defaulted attributes handling			*
1095  *									*
1096  ************************************************************************/
1097 
1098 /**
1099  * xmlDetectSAX2:
1100  * @ctxt:  an XML parser context
1101  *
1102  * Do the SAX2 detection and specific initialization
1103  */
1104 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1105 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1106     xmlSAXHandlerPtr sax;
1107     if (ctxt == NULL) return;
1108     sax = ctxt->sax;
1109 #ifdef LIBXML_SAX1_ENABLED
1110     if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1111         ((sax->startElementNs != NULL) ||
1112          (sax->endElementNs != NULL) ||
1113          ((sax->startElement == NULL) && (sax->endElement == NULL))))
1114         ctxt->sax2 = 1;
1115 #else
1116     ctxt->sax2 = 1;
1117 #endif /* LIBXML_SAX1_ENABLED */
1118 
1119     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1120     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1121     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1122     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1123 		(ctxt->str_xml_ns == NULL)) {
1124         xmlErrMemory(ctxt, NULL);
1125     }
1126 }
1127 
1128 typedef struct _xmlDefAttrs xmlDefAttrs;
1129 typedef xmlDefAttrs *xmlDefAttrsPtr;
1130 struct _xmlDefAttrs {
1131     int nbAttrs;	/* number of defaulted attributes on that element */
1132     int maxAttrs;       /* the size of the array */
1133 #if __STDC_VERSION__ >= 199901L
1134     /* Using a C99 flexible array member avoids UBSan errors. */
1135     const xmlChar *values[]; /* array of localname/prefix/values/external */
1136 #else
1137     const xmlChar *values[5];
1138 #endif
1139 };
1140 
1141 /**
1142  * xmlAttrNormalizeSpace:
1143  * @src: the source string
1144  * @dst: the target string
1145  *
1146  * Normalize the space in non CDATA attribute values:
1147  * If the attribute type is not CDATA, then the XML processor MUST further
1148  * process the normalized attribute value by discarding any leading and
1149  * trailing space (#x20) characters, and by replacing sequences of space
1150  * (#x20) characters by a single space (#x20) character.
1151  * Note that the size of dst need to be at least src, and if one doesn't need
1152  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1153  * passing src as dst is just fine.
1154  *
1155  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1156  *         is needed.
1157  */
1158 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1159 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1160 {
1161     if ((src == NULL) || (dst == NULL))
1162         return(NULL);
1163 
1164     while (*src == 0x20) src++;
1165     while (*src != 0) {
1166 	if (*src == 0x20) {
1167 	    while (*src == 0x20) src++;
1168 	    if (*src != 0)
1169 		*dst++ = 0x20;
1170 	} else {
1171 	    *dst++ = *src++;
1172 	}
1173     }
1174     *dst = 0;
1175     if (dst == src)
1176        return(NULL);
1177     return(dst);
1178 }
1179 
1180 /**
1181  * xmlAttrNormalizeSpace2:
1182  * @src: the source string
1183  *
1184  * Normalize the space in non CDATA attribute values, a slightly more complex
1185  * front end to avoid allocation problems when running on attribute values
1186  * coming from the input.
1187  *
1188  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1189  *         is needed.
1190  */
1191 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1192 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1193 {
1194     int i;
1195     int remove_head = 0;
1196     int need_realloc = 0;
1197     const xmlChar *cur;
1198 
1199     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1200         return(NULL);
1201     i = *len;
1202     if (i <= 0)
1203         return(NULL);
1204 
1205     cur = src;
1206     while (*cur == 0x20) {
1207         cur++;
1208 	remove_head++;
1209     }
1210     while (*cur != 0) {
1211 	if (*cur == 0x20) {
1212 	    cur++;
1213 	    if ((*cur == 0x20) || (*cur == 0)) {
1214 	        need_realloc = 1;
1215 		break;
1216 	    }
1217 	} else
1218 	    cur++;
1219     }
1220     if (need_realloc) {
1221         xmlChar *ret;
1222 
1223 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1224 	if (ret == NULL) {
1225 	    xmlErrMemory(ctxt, NULL);
1226 	    return(NULL);
1227 	}
1228 	xmlAttrNormalizeSpace(ret, ret);
1229 	*len = (int) strlen((const char *)ret);
1230         return(ret);
1231     } else if (remove_head) {
1232         *len -= remove_head;
1233         memmove(src, src + remove_head, 1 + *len);
1234 	return(src);
1235     }
1236     return(NULL);
1237 }
1238 
1239 /**
1240  * xmlAddDefAttrs:
1241  * @ctxt:  an XML parser context
1242  * @fullname:  the element fullname
1243  * @fullattr:  the attribute fullname
1244  * @value:  the attribute value
1245  *
1246  * Add a defaulted attribute for an element
1247  */
1248 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1249 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1250                const xmlChar *fullname,
1251                const xmlChar *fullattr,
1252                const xmlChar *value) {
1253     xmlDefAttrsPtr defaults;
1254     int len;
1255     const xmlChar *name;
1256     const xmlChar *prefix;
1257 
1258     /*
1259      * Allows to detect attribute redefinitions
1260      */
1261     if (ctxt->attsSpecial != NULL) {
1262         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1263 	    return;
1264     }
1265 
1266     if (ctxt->attsDefault == NULL) {
1267         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1268 	if (ctxt->attsDefault == NULL)
1269 	    goto mem_error;
1270     }
1271 
1272     /*
1273      * split the element name into prefix:localname , the string found
1274      * are within the DTD and then not associated to namespace names.
1275      */
1276     name = xmlSplitQName3(fullname, &len);
1277     if (name == NULL) {
1278         name = xmlDictLookup(ctxt->dict, fullname, -1);
1279 	prefix = NULL;
1280     } else {
1281         name = xmlDictLookup(ctxt->dict, name, -1);
1282 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1283     }
1284 
1285     /*
1286      * make sure there is some storage
1287      */
1288     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1289     if (defaults == NULL) {
1290         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1291 	                   (4 * 5) * sizeof(const xmlChar *));
1292 	if (defaults == NULL)
1293 	    goto mem_error;
1294 	defaults->nbAttrs = 0;
1295 	defaults->maxAttrs = 4;
1296 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 	                        defaults, NULL) < 0) {
1298 	    xmlFree(defaults);
1299 	    goto mem_error;
1300 	}
1301     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1302         xmlDefAttrsPtr temp;
1303 
1304         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1305 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1306 	if (temp == NULL)
1307 	    goto mem_error;
1308 	defaults = temp;
1309 	defaults->maxAttrs *= 2;
1310 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1311 	                        defaults, NULL) < 0) {
1312 	    xmlFree(defaults);
1313 	    goto mem_error;
1314 	}
1315     }
1316 
1317     /*
1318      * Split the element name into prefix:localname , the string found
1319      * are within the DTD and hen not associated to namespace names.
1320      */
1321     name = xmlSplitQName3(fullattr, &len);
1322     if (name == NULL) {
1323         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1324 	prefix = NULL;
1325     } else {
1326         name = xmlDictLookup(ctxt->dict, name, -1);
1327 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1328     }
1329 
1330     defaults->values[5 * defaults->nbAttrs] = name;
1331     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1332     /* intern the string and precompute the end */
1333     len = xmlStrlen(value);
1334     value = xmlDictLookup(ctxt->dict, value, len);
1335     defaults->values[5 * defaults->nbAttrs + 2] = value;
1336     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1337     if (ctxt->external)
1338         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1339     else
1340         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1341     defaults->nbAttrs++;
1342 
1343     return;
1344 
1345 mem_error:
1346     xmlErrMemory(ctxt, NULL);
1347     return;
1348 }
1349 
1350 /**
1351  * xmlAddSpecialAttr:
1352  * @ctxt:  an XML parser context
1353  * @fullname:  the element fullname
1354  * @fullattr:  the attribute fullname
1355  * @type:  the attribute type
1356  *
1357  * Register this attribute type
1358  */
1359 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1360 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1361 		  const xmlChar *fullname,
1362 		  const xmlChar *fullattr,
1363 		  int type)
1364 {
1365     if (ctxt->attsSpecial == NULL) {
1366         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1367 	if (ctxt->attsSpecial == NULL)
1368 	    goto mem_error;
1369     }
1370 
1371     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1372         return;
1373 
1374     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1375                      (void *) (ptrdiff_t) type);
1376     return;
1377 
1378 mem_error:
1379     xmlErrMemory(ctxt, NULL);
1380     return;
1381 }
1382 
1383 /**
1384  * xmlCleanSpecialAttrCallback:
1385  *
1386  * Removes CDATA attributes from the special attribute table
1387  */
1388 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1389 xmlCleanSpecialAttrCallback(void *payload, void *data,
1390                             const xmlChar *fullname, const xmlChar *fullattr,
1391                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1392     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1393 
1394     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1395         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1396     }
1397 }
1398 
1399 /**
1400  * xmlCleanSpecialAttr:
1401  * @ctxt:  an XML parser context
1402  *
1403  * Trim the list of attributes defined to remove all those of type
1404  * CDATA as they are not special. This call should be done when finishing
1405  * to parse the DTD and before starting to parse the document root.
1406  */
1407 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1408 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1409 {
1410     if (ctxt->attsSpecial == NULL)
1411         return;
1412 
1413     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1414 
1415     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1416         xmlHashFree(ctxt->attsSpecial, NULL);
1417         ctxt->attsSpecial = NULL;
1418     }
1419     return;
1420 }
1421 
1422 /**
1423  * xmlCheckLanguageID:
1424  * @lang:  pointer to the string value
1425  *
1426  * Checks that the value conforms to the LanguageID production:
1427  *
1428  * NOTE: this is somewhat deprecated, those productions were removed from
1429  *       the XML Second edition.
1430  *
1431  * [33] LanguageID ::= Langcode ('-' Subcode)*
1432  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1433  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1434  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1435  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1436  * [38] Subcode ::= ([a-z] | [A-Z])+
1437  *
1438  * The current REC reference the successors of RFC 1766, currently 5646
1439  *
1440  * http://www.rfc-editor.org/rfc/rfc5646.txt
1441  * langtag       = language
1442  *                 ["-" script]
1443  *                 ["-" region]
1444  *                 *("-" variant)
1445  *                 *("-" extension)
1446  *                 ["-" privateuse]
1447  * language      = 2*3ALPHA            ; shortest ISO 639 code
1448  *                 ["-" extlang]       ; sometimes followed by
1449  *                                     ; extended language subtags
1450  *               / 4ALPHA              ; or reserved for future use
1451  *               / 5*8ALPHA            ; or registered language subtag
1452  *
1453  * extlang       = 3ALPHA              ; selected ISO 639 codes
1454  *                 *2("-" 3ALPHA)      ; permanently reserved
1455  *
1456  * script        = 4ALPHA              ; ISO 15924 code
1457  *
1458  * region        = 2ALPHA              ; ISO 3166-1 code
1459  *               / 3DIGIT              ; UN M.49 code
1460  *
1461  * variant       = 5*8alphanum         ; registered variants
1462  *               / (DIGIT 3alphanum)
1463  *
1464  * extension     = singleton 1*("-" (2*8alphanum))
1465  *
1466  *                                     ; Single alphanumerics
1467  *                                     ; "x" reserved for private use
1468  * singleton     = DIGIT               ; 0 - 9
1469  *               / %x41-57             ; A - W
1470  *               / %x59-5A             ; Y - Z
1471  *               / %x61-77             ; a - w
1472  *               / %x79-7A             ; y - z
1473  *
1474  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1475  * The parser below doesn't try to cope with extension or privateuse
1476  * that could be added but that's not interoperable anyway
1477  *
1478  * Returns 1 if correct 0 otherwise
1479  **/
1480 int
xmlCheckLanguageID(const xmlChar * lang)1481 xmlCheckLanguageID(const xmlChar * lang)
1482 {
1483     const xmlChar *cur = lang, *nxt;
1484 
1485     if (cur == NULL)
1486         return (0);
1487     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1488         ((cur[0] == 'I') && (cur[1] == '-')) ||
1489         ((cur[0] == 'x') && (cur[1] == '-')) ||
1490         ((cur[0] == 'X') && (cur[1] == '-'))) {
1491         /*
1492          * Still allow IANA code and user code which were coming
1493          * from the previous version of the XML-1.0 specification
1494          * it's deprecated but we should not fail
1495          */
1496         cur += 2;
1497         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1498                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1499             cur++;
1500         return(cur[0] == 0);
1501     }
1502     nxt = cur;
1503     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505            nxt++;
1506     if (nxt - cur >= 4) {
1507         /*
1508          * Reserved
1509          */
1510         if ((nxt - cur > 8) || (nxt[0] != 0))
1511             return(0);
1512         return(1);
1513     }
1514     if (nxt - cur < 2)
1515         return(0);
1516     /* we got an ISO 639 code */
1517     if (nxt[0] == 0)
1518         return(1);
1519     if (nxt[0] != '-')
1520         return(0);
1521 
1522     nxt++;
1523     cur = nxt;
1524     /* now we can have extlang or script or region or variant */
1525     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526         goto region_m49;
1527 
1528     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530            nxt++;
1531     if (nxt - cur == 4)
1532         goto script;
1533     if (nxt - cur == 2)
1534         goto region;
1535     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536         goto variant;
1537     if (nxt - cur != 3)
1538         return(0);
1539     /* we parsed an extlang */
1540     if (nxt[0] == 0)
1541         return(1);
1542     if (nxt[0] != '-')
1543         return(0);
1544 
1545     nxt++;
1546     cur = nxt;
1547     /* now we can have script or region or variant */
1548     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1549         goto region_m49;
1550 
1551     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1552            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1553            nxt++;
1554     if (nxt - cur == 2)
1555         goto region;
1556     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1557         goto variant;
1558     if (nxt - cur != 4)
1559         return(0);
1560     /* we parsed a script */
1561 script:
1562     if (nxt[0] == 0)
1563         return(1);
1564     if (nxt[0] != '-')
1565         return(0);
1566 
1567     nxt++;
1568     cur = nxt;
1569     /* now we can have region or variant */
1570     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1571         goto region_m49;
1572 
1573     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1574            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1575            nxt++;
1576 
1577     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1578         goto variant;
1579     if (nxt - cur != 2)
1580         return(0);
1581     /* we parsed a region */
1582 region:
1583     if (nxt[0] == 0)
1584         return(1);
1585     if (nxt[0] != '-')
1586         return(0);
1587 
1588     nxt++;
1589     cur = nxt;
1590     /* now we can just have a variant */
1591     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1592            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1593            nxt++;
1594 
1595     if ((nxt - cur < 5) || (nxt - cur > 8))
1596         return(0);
1597 
1598     /* we parsed a variant */
1599 variant:
1600     if (nxt[0] == 0)
1601         return(1);
1602     if (nxt[0] != '-')
1603         return(0);
1604     /* extensions and private use subtags not checked */
1605     return (1);
1606 
1607 region_m49:
1608     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1609         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1610         nxt += 3;
1611         goto region;
1612     }
1613     return(0);
1614 }
1615 
1616 /************************************************************************
1617  *									*
1618  *		Parser stacks related functions and macros		*
1619  *									*
1620  ************************************************************************/
1621 
1622 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1623                                             const xmlChar ** str);
1624 
1625 #ifdef SAX2
1626 /**
1627  * nsPush:
1628  * @ctxt:  an XML parser context
1629  * @prefix:  the namespace prefix or NULL
1630  * @URL:  the namespace name
1631  *
1632  * Pushes a new parser namespace on top of the ns stack
1633  *
1634  * Returns -1 in case of error, -2 if the namespace should be discarded
1635  *	   and the index in the stack otherwise.
1636  */
1637 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1638 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1639 {
1640     if (ctxt->options & XML_PARSE_NSCLEAN) {
1641         int i;
1642 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1643 	    if (ctxt->nsTab[i] == prefix) {
1644 		/* in scope */
1645 	        if (ctxt->nsTab[i + 1] == URL)
1646 		    return(-2);
1647 		/* out of scope keep it */
1648 		break;
1649 	    }
1650 	}
1651     }
1652     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1653 	ctxt->nsMax = 10;
1654 	ctxt->nsNr = 0;
1655 	ctxt->nsTab = (const xmlChar **)
1656 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1657 	if (ctxt->nsTab == NULL) {
1658 	    xmlErrMemory(ctxt, NULL);
1659 	    ctxt->nsMax = 0;
1660             return (-1);
1661 	}
1662     } else if (ctxt->nsNr >= ctxt->nsMax) {
1663         const xmlChar ** tmp;
1664         ctxt->nsMax *= 2;
1665         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1666 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1667         if (tmp == NULL) {
1668             xmlErrMemory(ctxt, NULL);
1669 	    ctxt->nsMax /= 2;
1670             return (-1);
1671         }
1672 	ctxt->nsTab = tmp;
1673     }
1674     ctxt->nsTab[ctxt->nsNr++] = prefix;
1675     ctxt->nsTab[ctxt->nsNr++] = URL;
1676     return (ctxt->nsNr);
1677 }
1678 /**
1679  * nsPop:
1680  * @ctxt: an XML parser context
1681  * @nr:  the number to pop
1682  *
1683  * Pops the top @nr parser prefix/namespace from the ns stack
1684  *
1685  * Returns the number of namespaces removed
1686  */
1687 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1688 nsPop(xmlParserCtxtPtr ctxt, int nr)
1689 {
1690     int i;
1691 
1692     if (ctxt->nsTab == NULL) return(0);
1693     if (ctxt->nsNr < nr) {
1694         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1695         nr = ctxt->nsNr;
1696     }
1697     if (ctxt->nsNr <= 0)
1698         return (0);
1699 
1700     for (i = 0;i < nr;i++) {
1701          ctxt->nsNr--;
1702 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1703     }
1704     return(nr);
1705 }
1706 #endif
1707 
1708 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1709 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1710     const xmlChar **atts;
1711     int *attallocs;
1712     int maxatts;
1713 
1714     if (ctxt->atts == NULL) {
1715 	maxatts = 55; /* allow for 10 attrs by default */
1716 	atts = (const xmlChar **)
1717 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1718 	if (atts == NULL) goto mem_error;
1719 	ctxt->atts = atts;
1720 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1721 	if (attallocs == NULL) goto mem_error;
1722 	ctxt->attallocs = attallocs;
1723 	ctxt->maxatts = maxatts;
1724     } else if (nr + 5 > ctxt->maxatts) {
1725 	maxatts = (nr + 5) * 2;
1726 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1727 				     maxatts * sizeof(const xmlChar *));
1728 	if (atts == NULL) goto mem_error;
1729 	ctxt->atts = atts;
1730 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1731 	                             (maxatts / 5) * sizeof(int));
1732 	if (attallocs == NULL) goto mem_error;
1733 	ctxt->attallocs = attallocs;
1734 	ctxt->maxatts = maxatts;
1735     }
1736     return(ctxt->maxatts);
1737 mem_error:
1738     xmlErrMemory(ctxt, NULL);
1739     return(-1);
1740 }
1741 
1742 /**
1743  * inputPush:
1744  * @ctxt:  an XML parser context
1745  * @value:  the parser input
1746  *
1747  * Pushes a new parser input on top of the input stack
1748  *
1749  * Returns -1 in case of error, the index in the stack otherwise
1750  */
1751 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1752 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1753 {
1754     if ((ctxt == NULL) || (value == NULL))
1755         return(-1);
1756     if (ctxt->inputNr >= ctxt->inputMax) {
1757         ctxt->inputMax *= 2;
1758         ctxt->inputTab =
1759             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1760                                              ctxt->inputMax *
1761                                              sizeof(ctxt->inputTab[0]));
1762         if (ctxt->inputTab == NULL) {
1763             xmlErrMemory(ctxt, NULL);
1764 	    xmlFreeInputStream(value);
1765 	    ctxt->inputMax /= 2;
1766 	    value = NULL;
1767             return (-1);
1768         }
1769     }
1770     ctxt->inputTab[ctxt->inputNr] = value;
1771     ctxt->input = value;
1772     return (ctxt->inputNr++);
1773 }
1774 /**
1775  * inputPop:
1776  * @ctxt: an XML parser context
1777  *
1778  * Pops the top parser input from the input stack
1779  *
1780  * Returns the input just removed
1781  */
1782 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1783 inputPop(xmlParserCtxtPtr ctxt)
1784 {
1785     xmlParserInputPtr ret;
1786 
1787     if (ctxt == NULL)
1788         return(NULL);
1789     if (ctxt->inputNr <= 0)
1790         return (NULL);
1791     ctxt->inputNr--;
1792     if (ctxt->inputNr > 0)
1793         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1794     else
1795         ctxt->input = NULL;
1796     ret = ctxt->inputTab[ctxt->inputNr];
1797     ctxt->inputTab[ctxt->inputNr] = NULL;
1798     return (ret);
1799 }
1800 /**
1801  * nodePush:
1802  * @ctxt:  an XML parser context
1803  * @value:  the element node
1804  *
1805  * Pushes a new element node on top of the node stack
1806  *
1807  * Returns -1 in case of error, the index in the stack otherwise
1808  */
1809 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1810 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1811 {
1812     if (ctxt == NULL) return(0);
1813     if (ctxt->nodeNr >= ctxt->nodeMax) {
1814         xmlNodePtr *tmp;
1815 
1816 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1817                                       ctxt->nodeMax * 2 *
1818                                       sizeof(ctxt->nodeTab[0]));
1819         if (tmp == NULL) {
1820             xmlErrMemory(ctxt, NULL);
1821             return (-1);
1822         }
1823         ctxt->nodeTab = tmp;
1824 	ctxt->nodeMax *= 2;
1825     }
1826     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1827         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1828 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1829 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1830 			  xmlParserMaxDepth);
1831 	xmlHaltParser(ctxt);
1832 	return(-1);
1833     }
1834     ctxt->nodeTab[ctxt->nodeNr] = value;
1835     ctxt->node = value;
1836     return (ctxt->nodeNr++);
1837 }
1838 
1839 /**
1840  * nodePop:
1841  * @ctxt: an XML parser context
1842  *
1843  * Pops the top element node from the node stack
1844  *
1845  * Returns the node just removed
1846  */
1847 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1848 nodePop(xmlParserCtxtPtr ctxt)
1849 {
1850     xmlNodePtr ret;
1851 
1852     if (ctxt == NULL) return(NULL);
1853     if (ctxt->nodeNr <= 0)
1854         return (NULL);
1855     ctxt->nodeNr--;
1856     if (ctxt->nodeNr > 0)
1857         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1858     else
1859         ctxt->node = NULL;
1860     ret = ctxt->nodeTab[ctxt->nodeNr];
1861     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1862     return (ret);
1863 }
1864 
1865 /**
1866  * nameNsPush:
1867  * @ctxt:  an XML parser context
1868  * @value:  the element name
1869  * @prefix:  the element prefix
1870  * @URI:  the element namespace name
1871  * @line:  the current line number for error messages
1872  * @nsNr:  the number of namespaces pushed on the namespace table
1873  *
1874  * Pushes a new element name/prefix/URL on top of the name stack
1875  *
1876  * Returns -1 in case of error, the index in the stack otherwise
1877  */
1878 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1879 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1880            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1881 {
1882     xmlStartTag *tag;
1883 
1884     if (ctxt->nameNr >= ctxt->nameMax) {
1885         const xmlChar * *tmp;
1886         xmlStartTag *tmp2;
1887         ctxt->nameMax *= 2;
1888         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1889                                     ctxt->nameMax *
1890                                     sizeof(ctxt->nameTab[0]));
1891         if (tmp == NULL) {
1892 	    ctxt->nameMax /= 2;
1893 	    goto mem_error;
1894         }
1895 	ctxt->nameTab = tmp;
1896         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1897                                     ctxt->nameMax *
1898                                     sizeof(ctxt->pushTab[0]));
1899         if (tmp2 == NULL) {
1900 	    ctxt->nameMax /= 2;
1901 	    goto mem_error;
1902         }
1903 	ctxt->pushTab = tmp2;
1904     } else if (ctxt->pushTab == NULL) {
1905         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1906                                             sizeof(ctxt->pushTab[0]));
1907         if (ctxt->pushTab == NULL)
1908             goto mem_error;
1909     }
1910     ctxt->nameTab[ctxt->nameNr] = value;
1911     ctxt->name = value;
1912     tag = &ctxt->pushTab[ctxt->nameNr];
1913     tag->prefix = prefix;
1914     tag->URI = URI;
1915     tag->line = line;
1916     tag->nsNr = nsNr;
1917     return (ctxt->nameNr++);
1918 mem_error:
1919     xmlErrMemory(ctxt, NULL);
1920     return (-1);
1921 }
1922 #ifdef LIBXML_PUSH_ENABLED
1923 /**
1924  * nameNsPop:
1925  * @ctxt: an XML parser context
1926  *
1927  * Pops the top element/prefix/URI name from the name stack
1928  *
1929  * Returns the name just removed
1930  */
1931 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1932 nameNsPop(xmlParserCtxtPtr ctxt)
1933 {
1934     const xmlChar *ret;
1935 
1936     if (ctxt->nameNr <= 0)
1937         return (NULL);
1938     ctxt->nameNr--;
1939     if (ctxt->nameNr > 0)
1940         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1941     else
1942         ctxt->name = NULL;
1943     ret = ctxt->nameTab[ctxt->nameNr];
1944     ctxt->nameTab[ctxt->nameNr] = NULL;
1945     return (ret);
1946 }
1947 #endif /* LIBXML_PUSH_ENABLED */
1948 
1949 /**
1950  * namePush:
1951  * @ctxt:  an XML parser context
1952  * @value:  the element name
1953  *
1954  * Pushes a new element name on top of the name stack
1955  *
1956  * Returns -1 in case of error, the index in the stack otherwise
1957  */
1958 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1959 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1960 {
1961     if (ctxt == NULL) return (-1);
1962 
1963     if (ctxt->nameNr >= ctxt->nameMax) {
1964         const xmlChar * *tmp;
1965         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1966                                     ctxt->nameMax * 2 *
1967                                     sizeof(ctxt->nameTab[0]));
1968         if (tmp == NULL) {
1969 	    goto mem_error;
1970         }
1971 	ctxt->nameTab = tmp;
1972         ctxt->nameMax *= 2;
1973     }
1974     ctxt->nameTab[ctxt->nameNr] = value;
1975     ctxt->name = value;
1976     return (ctxt->nameNr++);
1977 mem_error:
1978     xmlErrMemory(ctxt, NULL);
1979     return (-1);
1980 }
1981 /**
1982  * namePop:
1983  * @ctxt: an XML parser context
1984  *
1985  * Pops the top element name from the name stack
1986  *
1987  * Returns the name just removed
1988  */
1989 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1990 namePop(xmlParserCtxtPtr ctxt)
1991 {
1992     const xmlChar *ret;
1993 
1994     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1995         return (NULL);
1996     ctxt->nameNr--;
1997     if (ctxt->nameNr > 0)
1998         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1999     else
2000         ctxt->name = NULL;
2001     ret = ctxt->nameTab[ctxt->nameNr];
2002     ctxt->nameTab[ctxt->nameNr] = NULL;
2003     return (ret);
2004 }
2005 
spacePush(xmlParserCtxtPtr ctxt,int val)2006 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2007     if (ctxt->spaceNr >= ctxt->spaceMax) {
2008         int *tmp;
2009 
2010 	ctxt->spaceMax *= 2;
2011         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2012 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2013         if (tmp == NULL) {
2014 	    xmlErrMemory(ctxt, NULL);
2015 	    ctxt->spaceMax /=2;
2016 	    return(-1);
2017 	}
2018 	ctxt->spaceTab = tmp;
2019     }
2020     ctxt->spaceTab[ctxt->spaceNr] = val;
2021     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2022     return(ctxt->spaceNr++);
2023 }
2024 
spacePop(xmlParserCtxtPtr ctxt)2025 static int spacePop(xmlParserCtxtPtr ctxt) {
2026     int ret;
2027     if (ctxt->spaceNr <= 0) return(0);
2028     ctxt->spaceNr--;
2029     if (ctxt->spaceNr > 0)
2030 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2031     else
2032         ctxt->space = &ctxt->spaceTab[0];
2033     ret = ctxt->spaceTab[ctxt->spaceNr];
2034     ctxt->spaceTab[ctxt->spaceNr] = -1;
2035     return(ret);
2036 }
2037 
2038 /*
2039  * Macros for accessing the content. Those should be used only by the parser,
2040  * and not exported.
2041  *
2042  * Dirty macros, i.e. one often need to make assumption on the context to
2043  * use them
2044  *
2045  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2046  *           To be used with extreme caution since operations consuming
2047  *           characters may move the input buffer to a different location !
2048  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2049  *           This should be used internally by the parser
2050  *           only to compare to ASCII values otherwise it would break when
2051  *           running with UTF-8 encoding.
2052  *   RAW     same as CUR but in the input buffer, bypass any token
2053  *           extraction that may have been done
2054  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2055  *           to compare on ASCII based substring.
2056  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2057  *           strings without newlines within the parser.
2058  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2059  *           defined char within the parser.
2060  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2061  *
2062  *   NEXT    Skip to the next character, this does the proper decoding
2063  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2064  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2065  *   CUR_CHAR(l) returns the current unicode character (int), set l
2066  *           to the number of xmlChars used for the encoding [0-5].
2067  *   CUR_SCHAR  same but operate on a string instead of the context
2068  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2069  *            the index
2070  *   GROW, SHRINK  handling of input buffers
2071  */
2072 
2073 #define RAW (*ctxt->input->cur)
2074 #define CUR (*ctxt->input->cur)
2075 #define NXT(val) ctxt->input->cur[(val)]
2076 #define CUR_PTR ctxt->input->cur
2077 #define BASE_PTR ctxt->input->base
2078 
2079 #define CMP4( s, c1, c2, c3, c4 ) \
2080   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2081     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2082 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2083   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2084 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2085   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2086 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2087   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2088 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2089   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2090 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2091   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2092     ((unsigned char *) s)[ 8 ] == c9 )
2093 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2094   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2095     ((unsigned char *) s)[ 9 ] == c10 )
2096 
2097 #define SKIP(val) do {							\
2098     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2099     if (*ctxt->input->cur == 0)						\
2100         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2101   } while (0)
2102 
2103 #define SKIPL(val) do {							\
2104     int skipl;								\
2105     for(skipl=0; skipl<val; skipl++) {					\
2106 	if (*(ctxt->input->cur) == '\n') {				\
2107 	ctxt->input->line++; ctxt->input->col = 1;			\
2108 	} else ctxt->input->col++;					\
2109 	ctxt->input->cur++;						\
2110     }									\
2111     if (*ctxt->input->cur == 0)						\
2112         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2113   } while (0)
2114 
2115 #define SHRINK if ((ctxt->progressive == 0) &&				\
2116 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2117 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2118 	xmlSHRINK (ctxt);
2119 
xmlSHRINK(xmlParserCtxtPtr ctxt)2120 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2121     xmlParserInputShrink(ctxt->input);
2122     if (*ctxt->input->cur == 0)
2123         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2124 }
2125 
2126 #define GROW if ((ctxt->progressive == 0) &&				\
2127 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2128 	xmlGROW (ctxt);
2129 
xmlGROW(xmlParserCtxtPtr ctxt)2130 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2131     ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2132     ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2133 
2134     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2135          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2136          ((ctxt->input->buf) &&
2137           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2138         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2139         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2140         xmlHaltParser(ctxt);
2141 	return;
2142     }
2143     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2144     if ((ctxt->input->cur > ctxt->input->end) ||
2145         (ctxt->input->cur < ctxt->input->base)) {
2146         xmlHaltParser(ctxt);
2147         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2148 	return;
2149     }
2150     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2151         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2152 }
2153 
2154 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2155 
2156 #define NEXT xmlNextChar(ctxt)
2157 
2158 #define NEXT1 {								\
2159 	ctxt->input->col++;						\
2160 	ctxt->input->cur++;						\
2161 	if (*ctxt->input->cur == 0)					\
2162 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2163     }
2164 
2165 #define NEXTL(l) do {							\
2166     if (*(ctxt->input->cur) == '\n') {					\
2167 	ctxt->input->line++; ctxt->input->col = 1;			\
2168     } else ctxt->input->col++;						\
2169     ctxt->input->cur += l;				\
2170   } while (0)
2171 
2172 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2173 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2174 
2175 #define COPY_BUF(l,b,i,v)						\
2176     if (l == 1) b[i++] = (xmlChar) v;					\
2177     else i += xmlCopyCharMultiByte(&b[i],v)
2178 
2179 /**
2180  * xmlSkipBlankChars:
2181  * @ctxt:  the XML parser context
2182  *
2183  * skip all blanks character found at that point in the input streams.
2184  * It pops up finished entities in the process if allowable at that point.
2185  *
2186  * Returns the number of space chars skipped
2187  */
2188 
2189 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2190 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191     int res = 0;
2192 
2193     /*
2194      * It's Okay to use CUR/NEXT here since all the blanks are on
2195      * the ASCII range.
2196      */
2197     if (ctxt->instate != XML_PARSER_DTD) {
2198 	const xmlChar *cur;
2199 	/*
2200 	 * if we are in the document content, go really fast
2201 	 */
2202 	cur = ctxt->input->cur;
2203 	while (IS_BLANK_CH(*cur)) {
2204 	    if (*cur == '\n') {
2205 		ctxt->input->line++; ctxt->input->col = 1;
2206 	    } else {
2207 		ctxt->input->col++;
2208 	    }
2209 	    cur++;
2210 	    res++;
2211 	    if (*cur == 0) {
2212 		ctxt->input->cur = cur;
2213 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2214 		cur = ctxt->input->cur;
2215 	    }
2216 	}
2217 	ctxt->input->cur = cur;
2218     } else {
2219         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2220 
2221 	while (1) {
2222             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2223 		NEXT;
2224 	    } else if (CUR == '%') {
2225                 /*
2226                  * Need to handle support of entities branching here
2227                  */
2228 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2229                     break;
2230 	        xmlParsePEReference(ctxt);
2231             } else if (CUR == 0) {
2232                 if (ctxt->inputNr <= 1)
2233                     break;
2234                 xmlPopInput(ctxt);
2235             } else {
2236                 break;
2237             }
2238 
2239             /*
2240              * Also increase the counter when entering or exiting a PERef.
2241              * The spec says: "When a parameter-entity reference is recognized
2242              * in the DTD and included, its replacement text MUST be enlarged
2243              * by the attachment of one leading and one following space (#x20)
2244              * character."
2245              */
2246 	    res++;
2247         }
2248     }
2249     return(res);
2250 }
2251 
2252 /************************************************************************
2253  *									*
2254  *		Commodity functions to handle entities			*
2255  *									*
2256  ************************************************************************/
2257 
2258 /**
2259  * xmlPopInput:
2260  * @ctxt:  an XML parser context
2261  *
2262  * xmlPopInput: the current input pointed by ctxt->input came to an end
2263  *          pop it and return the next char.
2264  *
2265  * Returns the current xmlChar in the parser context
2266  */
2267 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2268 xmlPopInput(xmlParserCtxtPtr ctxt) {
2269     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2270     if (xmlParserDebugEntities)
2271 	xmlGenericError(xmlGenericErrorContext,
2272 		"Popping input %d\n", ctxt->inputNr);
2273     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2274         (ctxt->instate != XML_PARSER_EOF))
2275         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2276                     "Unfinished entity outside the DTD");
2277     xmlFreeInputStream(inputPop(ctxt));
2278     if (*ctxt->input->cur == 0)
2279         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2280     return(CUR);
2281 }
2282 
2283 /**
2284  * xmlPushInput:
2285  * @ctxt:  an XML parser context
2286  * @input:  an XML parser input fragment (entity, XML fragment ...).
2287  *
2288  * xmlPushInput: switch to a new input stream which is stacked on top
2289  *               of the previous one(s).
2290  * Returns -1 in case of error or the index in the input stack
2291  */
2292 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2293 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2294     int ret;
2295     if (input == NULL) return(-1);
2296 
2297     if (xmlParserDebugEntities) {
2298 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2299 	    xmlGenericError(xmlGenericErrorContext,
2300 		    "%s(%d): ", ctxt->input->filename,
2301 		    ctxt->input->line);
2302 	xmlGenericError(xmlGenericErrorContext,
2303 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2304     }
2305     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2306         (ctxt->inputNr > 1024)) {
2307         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2308         while (ctxt->inputNr > 1)
2309             xmlFreeInputStream(inputPop(ctxt));
2310 	return(-1);
2311     }
2312     ret = inputPush(ctxt, input);
2313     if (ctxt->instate == XML_PARSER_EOF)
2314         return(-1);
2315     GROW;
2316     return(ret);
2317 }
2318 
2319 /**
2320  * xmlParseCharRef:
2321  * @ctxt:  an XML parser context
2322  *
2323  * parse Reference declarations
2324  *
2325  * [66] CharRef ::= '&#' [0-9]+ ';' |
2326  *                  '&#x' [0-9a-fA-F]+ ';'
2327  *
2328  * [ WFC: Legal Character ]
2329  * Characters referred to using character references must match the
2330  * production for Char.
2331  *
2332  * Returns the value parsed (as an int), 0 in case of error
2333  */
2334 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2335 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2336     int val = 0;
2337     int count = 0;
2338 
2339     /*
2340      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2341      */
2342     if ((RAW == '&') && (NXT(1) == '#') &&
2343         (NXT(2) == 'x')) {
2344 	SKIP(3);
2345 	GROW;
2346 	while (RAW != ';') { /* loop blocked by count */
2347 	    if (count++ > 20) {
2348 		count = 0;
2349 		GROW;
2350                 if (ctxt->instate == XML_PARSER_EOF)
2351                     return(0);
2352 	    }
2353 	    if ((RAW >= '0') && (RAW <= '9'))
2354 	        val = val * 16 + (CUR - '0');
2355 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2356 	        val = val * 16 + (CUR - 'a') + 10;
2357 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2358 	        val = val * 16 + (CUR - 'A') + 10;
2359 	    else {
2360 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2361 		val = 0;
2362 		break;
2363 	    }
2364 	    if (val > 0x110000)
2365 	        val = 0x110000;
2366 
2367 	    NEXT;
2368 	    count++;
2369 	}
2370 	if (RAW == ';') {
2371 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2372 	    ctxt->input->col++;
2373 	    ctxt->input->cur++;
2374 	}
2375     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2376 	SKIP(2);
2377 	GROW;
2378 	while (RAW != ';') { /* loop blocked by count */
2379 	    if (count++ > 20) {
2380 		count = 0;
2381 		GROW;
2382                 if (ctxt->instate == XML_PARSER_EOF)
2383                     return(0);
2384 	    }
2385 	    if ((RAW >= '0') && (RAW <= '9'))
2386 	        val = val * 10 + (CUR - '0');
2387 	    else {
2388 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2389 		val = 0;
2390 		break;
2391 	    }
2392 	    if (val > 0x110000)
2393 	        val = 0x110000;
2394 
2395 	    NEXT;
2396 	    count++;
2397 	}
2398 	if (RAW == ';') {
2399 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2400 	    ctxt->input->col++;
2401 	    ctxt->input->cur++;
2402 	}
2403     } else {
2404         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2405     }
2406 
2407     /*
2408      * [ WFC: Legal Character ]
2409      * Characters referred to using character references must match the
2410      * production for Char.
2411      */
2412     if (val >= 0x110000) {
2413         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2414                 "xmlParseCharRef: character reference out of bounds\n",
2415 	        val);
2416     } else if (IS_CHAR(val)) {
2417         return(val);
2418     } else {
2419         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2420                           "xmlParseCharRef: invalid xmlChar value %d\n",
2421 	                  val);
2422     }
2423     return(0);
2424 }
2425 
2426 /**
2427  * xmlParseStringCharRef:
2428  * @ctxt:  an XML parser context
2429  * @str:  a pointer to an index in the string
2430  *
2431  * parse Reference declarations, variant parsing from a string rather
2432  * than an an input flow.
2433  *
2434  * [66] CharRef ::= '&#' [0-9]+ ';' |
2435  *                  '&#x' [0-9a-fA-F]+ ';'
2436  *
2437  * [ WFC: Legal Character ]
2438  * Characters referred to using character references must match the
2439  * production for Char.
2440  *
2441  * Returns the value parsed (as an int), 0 in case of error, str will be
2442  *         updated to the current value of the index
2443  */
2444 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2445 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2446     const xmlChar *ptr;
2447     xmlChar cur;
2448     int val = 0;
2449 
2450     if ((str == NULL) || (*str == NULL)) return(0);
2451     ptr = *str;
2452     cur = *ptr;
2453     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2454 	ptr += 3;
2455 	cur = *ptr;
2456 	while (cur != ';') { /* Non input consuming loop */
2457 	    if ((cur >= '0') && (cur <= '9'))
2458 	        val = val * 16 + (cur - '0');
2459 	    else if ((cur >= 'a') && (cur <= 'f'))
2460 	        val = val * 16 + (cur - 'a') + 10;
2461 	    else if ((cur >= 'A') && (cur <= 'F'))
2462 	        val = val * 16 + (cur - 'A') + 10;
2463 	    else {
2464 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2465 		val = 0;
2466 		break;
2467 	    }
2468 	    if (val > 0x110000)
2469 	        val = 0x110000;
2470 
2471 	    ptr++;
2472 	    cur = *ptr;
2473 	}
2474 	if (cur == ';')
2475 	    ptr++;
2476     } else if  ((cur == '&') && (ptr[1] == '#')){
2477 	ptr += 2;
2478 	cur = *ptr;
2479 	while (cur != ';') { /* Non input consuming loops */
2480 	    if ((cur >= '0') && (cur <= '9'))
2481 	        val = val * 10 + (cur - '0');
2482 	    else {
2483 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2484 		val = 0;
2485 		break;
2486 	    }
2487 	    if (val > 0x110000)
2488 	        val = 0x110000;
2489 
2490 	    ptr++;
2491 	    cur = *ptr;
2492 	}
2493 	if (cur == ';')
2494 	    ptr++;
2495     } else {
2496 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2497 	return(0);
2498     }
2499     *str = ptr;
2500 
2501     /*
2502      * [ WFC: Legal Character ]
2503      * Characters referred to using character references must match the
2504      * production for Char.
2505      */
2506     if (val >= 0x110000) {
2507         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2508                 "xmlParseStringCharRef: character reference out of bounds\n",
2509                 val);
2510     } else if (IS_CHAR(val)) {
2511         return(val);
2512     } else {
2513         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2514 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2515 			  val);
2516     }
2517     return(0);
2518 }
2519 
2520 /**
2521  * xmlParserHandlePEReference:
2522  * @ctxt:  the parser context
2523  *
2524  * [69] PEReference ::= '%' Name ';'
2525  *
2526  * [ WFC: No Recursion ]
2527  * A parsed entity must not contain a recursive
2528  * reference to itself, either directly or indirectly.
2529  *
2530  * [ WFC: Entity Declared ]
2531  * In a document without any DTD, a document with only an internal DTD
2532  * subset which contains no parameter entity references, or a document
2533  * with "standalone='yes'", ...  ... The declaration of a parameter
2534  * entity must precede any reference to it...
2535  *
2536  * [ VC: Entity Declared ]
2537  * In a document with an external subset or external parameter entities
2538  * with "standalone='no'", ...  ... The declaration of a parameter entity
2539  * must precede any reference to it...
2540  *
2541  * [ WFC: In DTD ]
2542  * Parameter-entity references may only appear in the DTD.
2543  * NOTE: misleading but this is handled.
2544  *
2545  * A PEReference may have been detected in the current input stream
2546  * the handling is done accordingly to
2547  *      http://www.w3.org/TR/REC-xml#entproc
2548  * i.e.
2549  *   - Included in literal in entity values
2550  *   - Included as Parameter Entity reference within DTDs
2551  */
2552 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2553 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2554     switch(ctxt->instate) {
2555 	case XML_PARSER_CDATA_SECTION:
2556 	    return;
2557         case XML_PARSER_COMMENT:
2558 	    return;
2559 	case XML_PARSER_START_TAG:
2560 	    return;
2561 	case XML_PARSER_END_TAG:
2562 	    return;
2563         case XML_PARSER_EOF:
2564 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2565 	    return;
2566         case XML_PARSER_PROLOG:
2567 	case XML_PARSER_START:
2568 	case XML_PARSER_MISC:
2569 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2570 	    return;
2571 	case XML_PARSER_ENTITY_DECL:
2572         case XML_PARSER_CONTENT:
2573         case XML_PARSER_ATTRIBUTE_VALUE:
2574         case XML_PARSER_PI:
2575 	case XML_PARSER_SYSTEM_LITERAL:
2576 	case XML_PARSER_PUBLIC_LITERAL:
2577 	    /* we just ignore it there */
2578 	    return;
2579         case XML_PARSER_EPILOG:
2580 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2581 	    return;
2582 	case XML_PARSER_ENTITY_VALUE:
2583 	    /*
2584 	     * NOTE: in the case of entity values, we don't do the
2585 	     *       substitution here since we need the literal
2586 	     *       entity value to be able to save the internal
2587 	     *       subset of the document.
2588 	     *       This will be handled by xmlStringDecodeEntities
2589 	     */
2590 	    return;
2591         case XML_PARSER_DTD:
2592 	    /*
2593 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2594 	     * In the internal DTD subset, parameter-entity references
2595 	     * can occur only where markup declarations can occur, not
2596 	     * within markup declarations.
2597 	     * In that case this is handled in xmlParseMarkupDecl
2598 	     */
2599 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2600 		return;
2601 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2602 		return;
2603             break;
2604         case XML_PARSER_IGNORE:
2605             return;
2606     }
2607 
2608     xmlParsePEReference(ctxt);
2609 }
2610 
2611 /*
2612  * Macro used to grow the current buffer.
2613  * buffer##_size is expected to be a size_t
2614  * mem_error: is expected to handle memory allocation failures
2615  */
2616 #define growBuffer(buffer, n) {						\
2617     xmlChar *tmp;							\
2618     size_t new_size = buffer##_size * 2 + n;                            \
2619     if (new_size < buffer##_size) goto mem_error;                       \
2620     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2621     if (tmp == NULL) goto mem_error;					\
2622     buffer = tmp;							\
2623     buffer##_size = new_size;                                           \
2624 }
2625 
2626 /**
2627  * xmlStringLenDecodeEntities:
2628  * @ctxt:  the parser context
2629  * @str:  the input string
2630  * @len: the string length
2631  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2632  * @end:  an end marker xmlChar, 0 if none
2633  * @end2:  an end marker xmlChar, 0 if none
2634  * @end3:  an end marker xmlChar, 0 if none
2635  *
2636  * Takes a entity string content and process to do the adequate substitutions.
2637  *
2638  * [67] Reference ::= EntityRef | CharRef
2639  *
2640  * [69] PEReference ::= '%' Name ';'
2641  *
2642  * Returns A newly allocated string with the substitution done. The caller
2643  *      must deallocate it !
2644  */
2645 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2646 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2647 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2648     xmlChar *buffer = NULL;
2649     size_t buffer_size = 0;
2650     size_t nbchars = 0;
2651 
2652     xmlChar *current = NULL;
2653     xmlChar *rep = NULL;
2654     const xmlChar *last;
2655     xmlEntityPtr ent;
2656     int c,l;
2657 
2658     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2659 	return(NULL);
2660     last = str + len;
2661 
2662     if (((ctxt->depth > 40) &&
2663          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2664 	(ctxt->depth > 1024)) {
2665 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2666 	return(NULL);
2667     }
2668 
2669     /*
2670      * allocate a translation buffer.
2671      */
2672     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2673     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2674     if (buffer == NULL) goto mem_error;
2675 
2676     /*
2677      * OK loop until we reach one of the ending char or a size limit.
2678      * we are operating on already parsed values.
2679      */
2680     if (str < last)
2681 	c = CUR_SCHAR(str, l);
2682     else
2683         c = 0;
2684     while ((c != 0) && (c != end) && /* non input consuming loop */
2685            (c != end2) && (c != end3) &&
2686            (ctxt->instate != XML_PARSER_EOF)) {
2687 
2688 	if (c == 0) break;
2689         if ((c == '&') && (str[1] == '#')) {
2690 	    int val = xmlParseStringCharRef(ctxt, &str);
2691 	    if (val == 0)
2692                 goto int_error;
2693 	    COPY_BUF(0,buffer,nbchars,val);
2694 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2695 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2696 	    }
2697 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2698 	    if (xmlParserDebugEntities)
2699 		xmlGenericError(xmlGenericErrorContext,
2700 			"String decoding Entity Reference: %.30s\n",
2701 			str);
2702 	    ent = xmlParseStringEntityRef(ctxt, &str);
2703 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2704 	    if (ent != NULL)
2705 	        ctxt->nbentities += ent->checked / 2;
2706 	    if ((ent != NULL) &&
2707 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2708 		if (ent->content != NULL) {
2709 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2710 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2711 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2712 		    }
2713 		} else {
2714 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2715 			    "predefined entity has no content\n");
2716                     goto int_error;
2717 		}
2718 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2719 		ctxt->depth++;
2720 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2721 			                      0, 0, 0);
2722 		ctxt->depth--;
2723 		if (rep == NULL) {
2724                     ent->content[0] = 0;
2725                     goto int_error;
2726                 }
2727 
2728                 current = rep;
2729                 while (*current != 0) { /* non input consuming loop */
2730                     buffer[nbchars++] = *current++;
2731                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2732                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2733                             goto int_error;
2734                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2735                     }
2736                 }
2737                 xmlFree(rep);
2738                 rep = NULL;
2739 	    } else if (ent != NULL) {
2740 		int i = xmlStrlen(ent->name);
2741 		const xmlChar *cur = ent->name;
2742 
2743 		buffer[nbchars++] = '&';
2744 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2746 		}
2747 		for (;i > 0;i--)
2748 		    buffer[nbchars++] = *cur++;
2749 		buffer[nbchars++] = ';';
2750 	    }
2751 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2752 	    if (xmlParserDebugEntities)
2753 		xmlGenericError(xmlGenericErrorContext,
2754 			"String decoding PE Reference: %.30s\n", str);
2755 	    ent = xmlParseStringPEReference(ctxt, &str);
2756 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2757 	    if (ent != NULL)
2758 	        ctxt->nbentities += ent->checked / 2;
2759 	    if (ent != NULL) {
2760                 if (ent->content == NULL) {
2761 		    /*
2762 		     * Note: external parsed entities will not be loaded,
2763 		     * it is not required for a non-validating parser to
2764 		     * complete external PEReferences coming from the
2765 		     * internal subset
2766 		     */
2767 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2768 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2769 			(ctxt->validate != 0)) {
2770 			xmlLoadEntityContent(ctxt, ent);
2771 		    } else {
2772 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2773 		  "not validating will not read content for PE entity %s\n",
2774 		                      ent->name, NULL);
2775 		    }
2776 		}
2777 		ctxt->depth++;
2778 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2779 			                      0, 0, 0);
2780 		ctxt->depth--;
2781 		if (rep == NULL) {
2782                     if (ent->content != NULL)
2783                         ent->content[0] = 0;
2784                     goto int_error;
2785                 }
2786                 current = rep;
2787                 while (*current != 0) { /* non input consuming loop */
2788                     buffer[nbchars++] = *current++;
2789                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2791                             goto int_error;
2792                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2793                     }
2794                 }
2795                 xmlFree(rep);
2796                 rep = NULL;
2797 	    }
2798 	} else {
2799 	    COPY_BUF(l,buffer,nbchars,c);
2800 	    str += l;
2801 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2802 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2803 	    }
2804 	}
2805 	if (str < last)
2806 	    c = CUR_SCHAR(str, l);
2807 	else
2808 	    c = 0;
2809     }
2810     buffer[nbchars] = 0;
2811     return(buffer);
2812 
2813 mem_error:
2814     xmlErrMemory(ctxt, NULL);
2815 int_error:
2816     if (rep != NULL)
2817         xmlFree(rep);
2818     if (buffer != NULL)
2819         xmlFree(buffer);
2820     return(NULL);
2821 }
2822 
2823 /**
2824  * xmlStringDecodeEntities:
2825  * @ctxt:  the parser context
2826  * @str:  the input string
2827  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2828  * @end:  an end marker xmlChar, 0 if none
2829  * @end2:  an end marker xmlChar, 0 if none
2830  * @end3:  an end marker xmlChar, 0 if none
2831  *
2832  * Takes a entity string content and process to do the adequate substitutions.
2833  *
2834  * [67] Reference ::= EntityRef | CharRef
2835  *
2836  * [69] PEReference ::= '%' Name ';'
2837  *
2838  * Returns A newly allocated string with the substitution done. The caller
2839  *      must deallocate it !
2840  */
2841 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2842 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2843 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2844     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2845     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2846            end, end2, end3));
2847 }
2848 
2849 /************************************************************************
2850  *									*
2851  *		Commodity functions, cleanup needed ?			*
2852  *									*
2853  ************************************************************************/
2854 
2855 /**
2856  * areBlanks:
2857  * @ctxt:  an XML parser context
2858  * @str:  a xmlChar *
2859  * @len:  the size of @str
2860  * @blank_chars: we know the chars are blanks
2861  *
2862  * Is this a sequence of blank chars that one can ignore ?
2863  *
2864  * Returns 1 if ignorable 0 otherwise.
2865  */
2866 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2867 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2868                      int blank_chars) {
2869     int i, ret;
2870     xmlNodePtr lastChild;
2871 
2872     /*
2873      * Don't spend time trying to differentiate them, the same callback is
2874      * used !
2875      */
2876     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2877 	return(0);
2878 
2879     /*
2880      * Check for xml:space value.
2881      */
2882     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2883         (*(ctxt->space) == -2))
2884 	return(0);
2885 
2886     /*
2887      * Check that the string is made of blanks
2888      */
2889     if (blank_chars == 0) {
2890 	for (i = 0;i < len;i++)
2891 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2892     }
2893 
2894     /*
2895      * Look if the element is mixed content in the DTD if available
2896      */
2897     if (ctxt->node == NULL) return(0);
2898     if (ctxt->myDoc != NULL) {
2899 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2900         if (ret == 0) return(1);
2901         if (ret == 1) return(0);
2902     }
2903 
2904     /*
2905      * Otherwise, heuristic :-\
2906      */
2907     if ((RAW != '<') && (RAW != 0xD)) return(0);
2908     if ((ctxt->node->children == NULL) &&
2909 	(RAW == '<') && (NXT(1) == '/')) return(0);
2910 
2911     lastChild = xmlGetLastChild(ctxt->node);
2912     if (lastChild == NULL) {
2913         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2914             (ctxt->node->content != NULL)) return(0);
2915     } else if (xmlNodeIsText(lastChild))
2916         return(0);
2917     else if ((ctxt->node->children != NULL) &&
2918              (xmlNodeIsText(ctxt->node->children)))
2919         return(0);
2920     return(1);
2921 }
2922 
2923 /************************************************************************
2924  *									*
2925  *		Extra stuff for namespace support			*
2926  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2927  *									*
2928  ************************************************************************/
2929 
2930 /**
2931  * xmlSplitQName:
2932  * @ctxt:  an XML parser context
2933  * @name:  an XML parser context
2934  * @prefix:  a xmlChar **
2935  *
2936  * parse an UTF8 encoded XML qualified name string
2937  *
2938  * [NS 5] QName ::= (Prefix ':')? LocalPart
2939  *
2940  * [NS 6] Prefix ::= NCName
2941  *
2942  * [NS 7] LocalPart ::= NCName
2943  *
2944  * Returns the local part, and prefix is updated
2945  *   to get the Prefix if any.
2946  */
2947 
2948 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2949 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2950     xmlChar buf[XML_MAX_NAMELEN + 5];
2951     xmlChar *buffer = NULL;
2952     int len = 0;
2953     int max = XML_MAX_NAMELEN;
2954     xmlChar *ret = NULL;
2955     const xmlChar *cur = name;
2956     int c;
2957 
2958     if (prefix == NULL) return(NULL);
2959     *prefix = NULL;
2960 
2961     if (cur == NULL) return(NULL);
2962 
2963 #ifndef XML_XML_NAMESPACE
2964     /* xml: prefix is not really a namespace */
2965     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2966         (cur[2] == 'l') && (cur[3] == ':'))
2967 	return(xmlStrdup(name));
2968 #endif
2969 
2970     /* nasty but well=formed */
2971     if (cur[0] == ':')
2972 	return(xmlStrdup(name));
2973 
2974     c = *cur++;
2975     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2976 	buf[len++] = c;
2977 	c = *cur++;
2978     }
2979     if (len >= max) {
2980 	/*
2981 	 * Okay someone managed to make a huge name, so he's ready to pay
2982 	 * for the processing speed.
2983 	 */
2984 	max = len * 2;
2985 
2986 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2987 	if (buffer == NULL) {
2988 	    xmlErrMemory(ctxt, NULL);
2989 	    return(NULL);
2990 	}
2991 	memcpy(buffer, buf, len);
2992 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2993 	    if (len + 10 > max) {
2994 	        xmlChar *tmp;
2995 
2996 		max *= 2;
2997 		tmp = (xmlChar *) xmlRealloc(buffer,
2998 						max * sizeof(xmlChar));
2999 		if (tmp == NULL) {
3000 		    xmlFree(buffer);
3001 		    xmlErrMemory(ctxt, NULL);
3002 		    return(NULL);
3003 		}
3004 		buffer = tmp;
3005 	    }
3006 	    buffer[len++] = c;
3007 	    c = *cur++;
3008 	}
3009 	buffer[len] = 0;
3010     }
3011 
3012     if ((c == ':') && (*cur == 0)) {
3013         if (buffer != NULL)
3014 	    xmlFree(buffer);
3015 	*prefix = NULL;
3016 	return(xmlStrdup(name));
3017     }
3018 
3019     if (buffer == NULL)
3020 	ret = xmlStrndup(buf, len);
3021     else {
3022 	ret = buffer;
3023 	buffer = NULL;
3024 	max = XML_MAX_NAMELEN;
3025     }
3026 
3027 
3028     if (c == ':') {
3029 	c = *cur;
3030         *prefix = ret;
3031 	if (c == 0) {
3032 	    return(xmlStrndup(BAD_CAST "", 0));
3033 	}
3034 	len = 0;
3035 
3036 	/*
3037 	 * Check that the first character is proper to start
3038 	 * a new name
3039 	 */
3040 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3041 	      ((c >= 0x41) && (c <= 0x5A)) ||
3042 	      (c == '_') || (c == ':'))) {
3043 	    int l;
3044 	    int first = CUR_SCHAR(cur, l);
3045 
3046 	    if (!IS_LETTER(first) && (first != '_')) {
3047 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3048 			    "Name %s is not XML Namespace compliant\n",
3049 				  name);
3050 	    }
3051 	}
3052 	cur++;
3053 
3054 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3055 	    buf[len++] = c;
3056 	    c = *cur++;
3057 	}
3058 	if (len >= max) {
3059 	    /*
3060 	     * Okay someone managed to make a huge name, so he's ready to pay
3061 	     * for the processing speed.
3062 	     */
3063 	    max = len * 2;
3064 
3065 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3066 	    if (buffer == NULL) {
3067 	        xmlErrMemory(ctxt, NULL);
3068 		return(NULL);
3069 	    }
3070 	    memcpy(buffer, buf, len);
3071 	    while (c != 0) { /* tested bigname2.xml */
3072 		if (len + 10 > max) {
3073 		    xmlChar *tmp;
3074 
3075 		    max *= 2;
3076 		    tmp = (xmlChar *) xmlRealloc(buffer,
3077 						    max * sizeof(xmlChar));
3078 		    if (tmp == NULL) {
3079 			xmlErrMemory(ctxt, NULL);
3080 			xmlFree(buffer);
3081 			return(NULL);
3082 		    }
3083 		    buffer = tmp;
3084 		}
3085 		buffer[len++] = c;
3086 		c = *cur++;
3087 	    }
3088 	    buffer[len] = 0;
3089 	}
3090 
3091 	if (buffer == NULL)
3092 	    ret = xmlStrndup(buf, len);
3093 	else {
3094 	    ret = buffer;
3095 	}
3096     }
3097 
3098     return(ret);
3099 }
3100 
3101 /************************************************************************
3102  *									*
3103  *			The parser itself				*
3104  *	Relates to http://www.w3.org/TR/REC-xml				*
3105  *									*
3106  ************************************************************************/
3107 
3108 /************************************************************************
3109  *									*
3110  *	Routines to parse Name, NCName and NmToken			*
3111  *									*
3112  ************************************************************************/
3113 #ifdef DEBUG
3114 static unsigned long nbParseName = 0;
3115 static unsigned long nbParseNmToken = 0;
3116 static unsigned long nbParseNCName = 0;
3117 static unsigned long nbParseNCNameComplex = 0;
3118 static unsigned long nbParseNameComplex = 0;
3119 static unsigned long nbParseStringName = 0;
3120 #endif
3121 
3122 /*
3123  * The two following functions are related to the change of accepted
3124  * characters for Name and NmToken in the Revision 5 of XML-1.0
3125  * They correspond to the modified production [4] and the new production [4a]
3126  * changes in that revision. Also note that the macros used for the
3127  * productions Letter, Digit, CombiningChar and Extender are not needed
3128  * anymore.
3129  * We still keep compatibility to pre-revision5 parsing semantic if the
3130  * new XML_PARSE_OLD10 option is given to the parser.
3131  */
3132 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3133 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3134     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3135         /*
3136 	 * Use the new checks of production [4] [4a] amd [5] of the
3137 	 * Update 5 of XML-1.0
3138 	 */
3139 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3140 	    (((c >= 'a') && (c <= 'z')) ||
3141 	     ((c >= 'A') && (c <= 'Z')) ||
3142 	     (c == '_') || (c == ':') ||
3143 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3144 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3145 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3146 	     ((c >= 0x370) && (c <= 0x37D)) ||
3147 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3148 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3149 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3150 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3155 	    return(1);
3156     } else {
3157         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3158 	    return(1);
3159     }
3160     return(0);
3161 }
3162 
3163 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3164 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3165     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166         /*
3167 	 * Use the new checks of production [4] [4a] amd [5] of the
3168 	 * Update 5 of XML-1.0
3169 	 */
3170 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171 	    (((c >= 'a') && (c <= 'z')) ||
3172 	     ((c >= 'A') && (c <= 'Z')) ||
3173 	     ((c >= '0') && (c <= '9')) || /* !start */
3174 	     (c == '_') || (c == ':') ||
3175 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3176 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3177 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3178 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3179 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3180 	     ((c >= 0x370) && (c <= 0x37D)) ||
3181 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3182 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3183 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3184 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3185 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3186 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3187 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3188 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3189 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3190 	     return(1);
3191     } else {
3192         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3193             (c == '.') || (c == '-') ||
3194 	    (c == '_') || (c == ':') ||
3195 	    (IS_COMBINING(c)) ||
3196 	    (IS_EXTENDER(c)))
3197 	    return(1);
3198     }
3199     return(0);
3200 }
3201 
3202 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3203                                           int *len, int *alloc, int normalize);
3204 
3205 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3206 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3207     int len = 0, l;
3208     int c;
3209     int count = 0;
3210 
3211 #ifdef DEBUG
3212     nbParseNameComplex++;
3213 #endif
3214 
3215     /*
3216      * Handler for more complex cases
3217      */
3218     GROW;
3219     if (ctxt->instate == XML_PARSER_EOF)
3220         return(NULL);
3221     c = CUR_CHAR(l);
3222     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223         /*
3224 	 * Use the new checks of production [4] [4a] amd [5] of the
3225 	 * Update 5 of XML-1.0
3226 	 */
3227 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 	    (!(((c >= 'a') && (c <= 'z')) ||
3229 	       ((c >= 'A') && (c <= 'Z')) ||
3230 	       (c == '_') || (c == ':') ||
3231 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3232 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3233 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 	       ((c >= 0x370) && (c <= 0x37D)) ||
3235 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3237 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3238 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 	    return(NULL);
3244 	}
3245 	len += l;
3246 	NEXTL(l);
3247 	c = CUR_CHAR(l);
3248 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 	       (((c >= 'a') && (c <= 'z')) ||
3250 	        ((c >= 'A') && (c <= 'Z')) ||
3251 	        ((c >= '0') && (c <= '9')) || /* !start */
3252 	        (c == '_') || (c == ':') ||
3253 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3255 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3256 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 	        ((c >= 0x370) && (c <= 0x37D)) ||
3259 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3261 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3263 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3268 		)) {
3269 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3270 		count = 0;
3271 		GROW;
3272                 if (ctxt->instate == XML_PARSER_EOF)
3273                     return(NULL);
3274 	    }
3275 	    len += l;
3276 	    NEXTL(l);
3277 	    c = CUR_CHAR(l);
3278 	}
3279     } else {
3280 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3281 	    (!IS_LETTER(c) && (c != '_') &&
3282 	     (c != ':'))) {
3283 	    return(NULL);
3284 	}
3285 	len += l;
3286 	NEXTL(l);
3287 	c = CUR_CHAR(l);
3288 
3289 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3290 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3291 		(c == '.') || (c == '-') ||
3292 		(c == '_') || (c == ':') ||
3293 		(IS_COMBINING(c)) ||
3294 		(IS_EXTENDER(c)))) {
3295 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3296 		count = 0;
3297 		GROW;
3298                 if (ctxt->instate == XML_PARSER_EOF)
3299                     return(NULL);
3300 	    }
3301 	    len += l;
3302 	    NEXTL(l);
3303 	    c = CUR_CHAR(l);
3304 	}
3305     }
3306     if ((len > XML_MAX_NAME_LENGTH) &&
3307         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3308         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309         return(NULL);
3310     }
3311     if (ctxt->input->cur - ctxt->input->base < len) {
3312         /*
3313          * There were a couple of bugs where PERefs lead to to a change
3314          * of the buffer. Check the buffer size to avoid passing an invalid
3315          * pointer to xmlDictLookup.
3316          */
3317         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318                     "unexpected change of input buffer");
3319         return (NULL);
3320     }
3321     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324 }
3325 
3326 /**
3327  * xmlParseName:
3328  * @ctxt:  an XML parser context
3329  *
3330  * parse an XML name.
3331  *
3332  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3333  *                  CombiningChar | Extender
3334  *
3335  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3336  *
3337  * [6] Names ::= Name (#x20 Name)*
3338  *
3339  * Returns the Name parsed or NULL
3340  */
3341 
3342 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3343 xmlParseName(xmlParserCtxtPtr ctxt) {
3344     const xmlChar *in;
3345     const xmlChar *ret;
3346     int count = 0;
3347 
3348     GROW;
3349 
3350 #ifdef DEBUG
3351     nbParseName++;
3352 #endif
3353 
3354     /*
3355      * Accelerator for simple ASCII names
3356      */
3357     in = ctxt->input->cur;
3358     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 	((*in >= 0x41) && (*in <= 0x5A)) ||
3360 	(*in == '_') || (*in == ':')) {
3361 	in++;
3362 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3365 	       (*in == '_') || (*in == '-') ||
3366 	       (*in == ':') || (*in == '.'))
3367 	    in++;
3368 	if ((*in > 0) && (*in < 0x80)) {
3369 	    count = in - ctxt->input->cur;
3370             if ((count > XML_MAX_NAME_LENGTH) &&
3371                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3372                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3373                 return(NULL);
3374             }
3375 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3376 	    ctxt->input->cur = in;
3377 	    ctxt->input->col += count;
3378 	    if (ret == NULL)
3379 	        xmlErrMemory(ctxt, NULL);
3380 	    return(ret);
3381 	}
3382     }
3383     /* accelerator for special cases */
3384     return(xmlParseNameComplex(ctxt));
3385 }
3386 
3387 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3388 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3389     int len = 0, l;
3390     int c;
3391     int count = 0;
3392     size_t startPosition = 0;
3393 
3394 #ifdef DEBUG
3395     nbParseNCNameComplex++;
3396 #endif
3397 
3398     /*
3399      * Handler for more complex cases
3400      */
3401     GROW;
3402     startPosition = CUR_PTR - BASE_PTR;
3403     c = CUR_CHAR(l);
3404     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406 	return(NULL);
3407     }
3408 
3409     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3411 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3412             if ((len > XML_MAX_NAME_LENGTH) &&
3413                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3414                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3415                 return(NULL);
3416             }
3417 	    count = 0;
3418 	    GROW;
3419             if (ctxt->instate == XML_PARSER_EOF)
3420                 return(NULL);
3421 	}
3422 	len += l;
3423 	NEXTL(l);
3424 	c = CUR_CHAR(l);
3425 	if (c == 0) {
3426 	    count = 0;
3427 	    /*
3428 	     * when shrinking to extend the buffer we really need to preserve
3429 	     * the part of the name we already parsed. Hence rolling back
3430 	     * by current length.
3431 	     */
3432 	    ctxt->input->cur -= l;
3433 	    GROW;
3434             if (ctxt->instate == XML_PARSER_EOF)
3435                 return(NULL);
3436 	    ctxt->input->cur += l;
3437 	    c = CUR_CHAR(l);
3438 	}
3439     }
3440     if ((len > XML_MAX_NAME_LENGTH) &&
3441         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3442         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3443         return(NULL);
3444     }
3445     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3446 }
3447 
3448 /**
3449  * xmlParseNCName:
3450  * @ctxt:  an XML parser context
3451  * @len:  length of the string parsed
3452  *
3453  * parse an XML name.
3454  *
3455  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3456  *                      CombiningChar | Extender
3457  *
3458  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3459  *
3460  * Returns the Name parsed or NULL
3461  */
3462 
3463 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3464 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3465     const xmlChar *in, *e;
3466     const xmlChar *ret;
3467     int count = 0;
3468 
3469 #ifdef DEBUG
3470     nbParseNCName++;
3471 #endif
3472 
3473     /*
3474      * Accelerator for simple ASCII names
3475      */
3476     in = ctxt->input->cur;
3477     e = ctxt->input->end;
3478     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3479 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3480 	 (*in == '_')) && (in < e)) {
3481 	in++;
3482 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3485 	        (*in == '_') || (*in == '-') ||
3486 	        (*in == '.')) && (in < e))
3487 	    in++;
3488 	if (in >= e)
3489 	    goto complex;
3490 	if ((*in > 0) && (*in < 0x80)) {
3491 	    count = in - ctxt->input->cur;
3492             if ((count > XML_MAX_NAME_LENGTH) &&
3493                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3494                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3495                 return(NULL);
3496             }
3497 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3498 	    ctxt->input->cur = in;
3499 	    ctxt->input->col += count;
3500 	    if (ret == NULL) {
3501 	        xmlErrMemory(ctxt, NULL);
3502 	    }
3503 	    return(ret);
3504 	}
3505     }
3506 complex:
3507     return(xmlParseNCNameComplex(ctxt));
3508 }
3509 
3510 /**
3511  * xmlParseNameAndCompare:
3512  * @ctxt:  an XML parser context
3513  *
3514  * parse an XML name and compares for match
3515  * (specialized for endtag parsing)
3516  *
3517  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3518  * and the name for mismatch
3519  */
3520 
3521 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3522 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3523     register const xmlChar *cmp = other;
3524     register const xmlChar *in;
3525     const xmlChar *ret;
3526 
3527     GROW;
3528     if (ctxt->instate == XML_PARSER_EOF)
3529         return(NULL);
3530 
3531     in = ctxt->input->cur;
3532     while (*in != 0 && *in == *cmp) {
3533 	++in;
3534 	++cmp;
3535     }
3536     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3537 	/* success */
3538 	ctxt->input->col += in - ctxt->input->cur;
3539 	ctxt->input->cur = in;
3540 	return (const xmlChar*) 1;
3541     }
3542     /* failure (or end of input buffer), check with full function */
3543     ret = xmlParseName (ctxt);
3544     /* strings coming from the dictionary direct compare possible */
3545     if (ret == other) {
3546 	return (const xmlChar*) 1;
3547     }
3548     return ret;
3549 }
3550 
3551 /**
3552  * xmlParseStringName:
3553  * @ctxt:  an XML parser context
3554  * @str:  a pointer to the string pointer (IN/OUT)
3555  *
3556  * parse an XML name.
3557  *
3558  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3559  *                  CombiningChar | Extender
3560  *
3561  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3562  *
3563  * [6] Names ::= Name (#x20 Name)*
3564  *
3565  * Returns the Name parsed or NULL. The @str pointer
3566  * is updated to the current location in the string.
3567  */
3568 
3569 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3570 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3571     xmlChar buf[XML_MAX_NAMELEN + 5];
3572     const xmlChar *cur = *str;
3573     int len = 0, l;
3574     int c;
3575 
3576 #ifdef DEBUG
3577     nbParseStringName++;
3578 #endif
3579 
3580     c = CUR_SCHAR(cur, l);
3581     if (!xmlIsNameStartChar(ctxt, c)) {
3582 	return(NULL);
3583     }
3584 
3585     COPY_BUF(l,buf,len,c);
3586     cur += l;
3587     c = CUR_SCHAR(cur, l);
3588     while (xmlIsNameChar(ctxt, c)) {
3589 	COPY_BUF(l,buf,len,c);
3590 	cur += l;
3591 	c = CUR_SCHAR(cur, l);
3592 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3593 	    /*
3594 	     * Okay someone managed to make a huge name, so he's ready to pay
3595 	     * for the processing speed.
3596 	     */
3597 	    xmlChar *buffer;
3598 	    int max = len * 2;
3599 
3600 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3601 	    if (buffer == NULL) {
3602 	        xmlErrMemory(ctxt, NULL);
3603 		return(NULL);
3604 	    }
3605 	    memcpy(buffer, buf, len);
3606 	    while (xmlIsNameChar(ctxt, c)) {
3607 		if (len + 10 > max) {
3608 		    xmlChar *tmp;
3609 
3610                     if ((len > XML_MAX_NAME_LENGTH) &&
3611                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3612                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613 			xmlFree(buffer);
3614                         return(NULL);
3615                     }
3616 		    max *= 2;
3617 		    tmp = (xmlChar *) xmlRealloc(buffer,
3618 			                            max * sizeof(xmlChar));
3619 		    if (tmp == NULL) {
3620 			xmlErrMemory(ctxt, NULL);
3621 			xmlFree(buffer);
3622 			return(NULL);
3623 		    }
3624 		    buffer = tmp;
3625 		}
3626 		COPY_BUF(l,buffer,len,c);
3627 		cur += l;
3628 		c = CUR_SCHAR(cur, l);
3629 	    }
3630 	    buffer[len] = 0;
3631 	    *str = cur;
3632 	    return(buffer);
3633 	}
3634     }
3635     if ((len > XML_MAX_NAME_LENGTH) &&
3636         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3637         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638         return(NULL);
3639     }
3640     *str = cur;
3641     return(xmlStrndup(buf, len));
3642 }
3643 
3644 /**
3645  * xmlParseNmtoken:
3646  * @ctxt:  an XML parser context
3647  *
3648  * parse an XML Nmtoken.
3649  *
3650  * [7] Nmtoken ::= (NameChar)+
3651  *
3652  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3653  *
3654  * Returns the Nmtoken parsed or NULL
3655  */
3656 
3657 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3658 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3659     xmlChar buf[XML_MAX_NAMELEN + 5];
3660     int len = 0, l;
3661     int c;
3662     int count = 0;
3663 
3664 #ifdef DEBUG
3665     nbParseNmToken++;
3666 #endif
3667 
3668     GROW;
3669     if (ctxt->instate == XML_PARSER_EOF)
3670         return(NULL);
3671     c = CUR_CHAR(l);
3672 
3673     while (xmlIsNameChar(ctxt, c)) {
3674 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3675 	    count = 0;
3676 	    GROW;
3677 	}
3678 	COPY_BUF(l,buf,len,c);
3679 	NEXTL(l);
3680 	c = CUR_CHAR(l);
3681 	if (c == 0) {
3682 	    count = 0;
3683 	    GROW;
3684 	    if (ctxt->instate == XML_PARSER_EOF)
3685 		return(NULL);
3686             c = CUR_CHAR(l);
3687 	}
3688 	if (len >= XML_MAX_NAMELEN) {
3689 	    /*
3690 	     * Okay someone managed to make a huge token, so he's ready to pay
3691 	     * for the processing speed.
3692 	     */
3693 	    xmlChar *buffer;
3694 	    int max = len * 2;
3695 
3696 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3697 	    if (buffer == NULL) {
3698 	        xmlErrMemory(ctxt, NULL);
3699 		return(NULL);
3700 	    }
3701 	    memcpy(buffer, buf, len);
3702 	    while (xmlIsNameChar(ctxt, c)) {
3703 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3704 		    count = 0;
3705 		    GROW;
3706                     if (ctxt->instate == XML_PARSER_EOF) {
3707                         xmlFree(buffer);
3708                         return(NULL);
3709                     }
3710 		}
3711 		if (len + 10 > max) {
3712 		    xmlChar *tmp;
3713 
3714                     if ((max > XML_MAX_NAME_LENGTH) &&
3715                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3717                         xmlFree(buffer);
3718                         return(NULL);
3719                     }
3720 		    max *= 2;
3721 		    tmp = (xmlChar *) xmlRealloc(buffer,
3722 			                            max * sizeof(xmlChar));
3723 		    if (tmp == NULL) {
3724 			xmlErrMemory(ctxt, NULL);
3725 			xmlFree(buffer);
3726 			return(NULL);
3727 		    }
3728 		    buffer = tmp;
3729 		}
3730 		COPY_BUF(l,buffer,len,c);
3731 		NEXTL(l);
3732 		c = CUR_CHAR(l);
3733 	    }
3734 	    buffer[len] = 0;
3735 	    return(buffer);
3736 	}
3737     }
3738     if (len == 0)
3739         return(NULL);
3740     if ((len > XML_MAX_NAME_LENGTH) &&
3741         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3742         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743         return(NULL);
3744     }
3745     return(xmlStrndup(buf, len));
3746 }
3747 
3748 /**
3749  * xmlParseEntityValue:
3750  * @ctxt:  an XML parser context
3751  * @orig:  if non-NULL store a copy of the original entity value
3752  *
3753  * parse a value for ENTITY declarations
3754  *
3755  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3756  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3757  *
3758  * Returns the EntityValue parsed with reference substituted or NULL
3759  */
3760 
3761 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3762 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3763     xmlChar *buf = NULL;
3764     int len = 0;
3765     int size = XML_PARSER_BUFFER_SIZE;
3766     int c, l;
3767     xmlChar stop;
3768     xmlChar *ret = NULL;
3769     const xmlChar *cur = NULL;
3770     xmlParserInputPtr input;
3771 
3772     if (RAW == '"') stop = '"';
3773     else if (RAW == '\'') stop = '\'';
3774     else {
3775 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3776 	return(NULL);
3777     }
3778     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3779     if (buf == NULL) {
3780 	xmlErrMemory(ctxt, NULL);
3781 	return(NULL);
3782     }
3783 
3784     /*
3785      * The content of the entity definition is copied in a buffer.
3786      */
3787 
3788     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3789     input = ctxt->input;
3790     GROW;
3791     if (ctxt->instate == XML_PARSER_EOF)
3792         goto error;
3793     NEXT;
3794     c = CUR_CHAR(l);
3795     /*
3796      * NOTE: 4.4.5 Included in Literal
3797      * When a parameter entity reference appears in a literal entity
3798      * value, ... a single or double quote character in the replacement
3799      * text is always treated as a normal data character and will not
3800      * terminate the literal.
3801      * In practice it means we stop the loop only when back at parsing
3802      * the initial entity and the quote is found
3803      */
3804     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3805 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3806 	if (len + 5 >= size) {
3807 	    xmlChar *tmp;
3808 
3809 	    size *= 2;
3810 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3811 	    if (tmp == NULL) {
3812 		xmlErrMemory(ctxt, NULL);
3813                 goto error;
3814 	    }
3815 	    buf = tmp;
3816 	}
3817 	COPY_BUF(l,buf,len,c);
3818 	NEXTL(l);
3819 
3820 	GROW;
3821 	c = CUR_CHAR(l);
3822 	if (c == 0) {
3823 	    GROW;
3824 	    c = CUR_CHAR(l);
3825 	}
3826     }
3827     buf[len] = 0;
3828     if (ctxt->instate == XML_PARSER_EOF)
3829         goto error;
3830     if (c != stop) {
3831         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3832         goto error;
3833     }
3834     NEXT;
3835 
3836     /*
3837      * Raise problem w.r.t. '&' and '%' being used in non-entities
3838      * reference constructs. Note Charref will be handled in
3839      * xmlStringDecodeEntities()
3840      */
3841     cur = buf;
3842     while (*cur != 0) { /* non input consuming */
3843 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3844 	    xmlChar *name;
3845 	    xmlChar tmp = *cur;
3846             int nameOk = 0;
3847 
3848 	    cur++;
3849 	    name = xmlParseStringName(ctxt, &cur);
3850             if (name != NULL) {
3851                 nameOk = 1;
3852                 xmlFree(name);
3853             }
3854             if ((nameOk == 0) || (*cur != ';')) {
3855 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3856 	    "EntityValue: '%c' forbidden except for entities references\n",
3857 	                          tmp);
3858                 goto error;
3859 	    }
3860 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861 		(ctxt->inputNr == 1)) {
3862 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3863                 goto error;
3864 	    }
3865 	    if (*cur == 0)
3866 	        break;
3867 	}
3868 	cur++;
3869     }
3870 
3871     /*
3872      * Then PEReference entities are substituted.
3873      *
3874      * NOTE: 4.4.7 Bypassed
3875      * When a general entity reference appears in the EntityValue in
3876      * an entity declaration, it is bypassed and left as is.
3877      * so XML_SUBSTITUTE_REF is not set here.
3878      */
3879     ++ctxt->depth;
3880     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3881                                   0, 0, 0);
3882     --ctxt->depth;
3883     if (orig != NULL) {
3884         *orig = buf;
3885         buf = NULL;
3886     }
3887 
3888 error:
3889     if (buf != NULL)
3890         xmlFree(buf);
3891     return(ret);
3892 }
3893 
3894 /**
3895  * xmlParseAttValueComplex:
3896  * @ctxt:  an XML parser context
3897  * @len:   the resulting attribute len
3898  * @normalize:  whether to apply the inner normalization
3899  *
3900  * parse a value for an attribute, this is the fallback function
3901  * of xmlParseAttValue() when the attribute parsing requires handling
3902  * of non-ASCII characters, or normalization compaction.
3903  *
3904  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3905  */
3906 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3907 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3908     xmlChar limit = 0;
3909     xmlChar *buf = NULL;
3910     xmlChar *rep = NULL;
3911     size_t len = 0;
3912     size_t buf_size = 0;
3913     int c, l, in_space = 0;
3914     xmlChar *current = NULL;
3915     xmlEntityPtr ent;
3916 
3917     if (NXT(0) == '"') {
3918 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3919 	limit = '"';
3920         NEXT;
3921     } else if (NXT(0) == '\'') {
3922 	limit = '\'';
3923 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924         NEXT;
3925     } else {
3926 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3927 	return(NULL);
3928     }
3929 
3930     /*
3931      * allocate a translation buffer.
3932      */
3933     buf_size = XML_PARSER_BUFFER_SIZE;
3934     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3935     if (buf == NULL) goto mem_error;
3936 
3937     /*
3938      * OK loop until we reach one of the ending char or a size limit.
3939      */
3940     c = CUR_CHAR(l);
3941     while (((NXT(0) != limit) && /* checked */
3942             (IS_CHAR(c)) && (c != '<')) &&
3943             (ctxt->instate != XML_PARSER_EOF)) {
3944         /*
3945          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3946          * special option is given
3947          */
3948         if ((len > XML_MAX_TEXT_LENGTH) &&
3949             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3950             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3951                            "AttValue length too long\n");
3952             goto mem_error;
3953         }
3954 	if (c == '&') {
3955 	    in_space = 0;
3956 	    if (NXT(1) == '#') {
3957 		int val = xmlParseCharRef(ctxt);
3958 
3959 		if (val == '&') {
3960 		    if (ctxt->replaceEntities) {
3961 			if (len + 10 > buf_size) {
3962 			    growBuffer(buf, 10);
3963 			}
3964 			buf[len++] = '&';
3965 		    } else {
3966 			/*
3967 			 * The reparsing will be done in xmlStringGetNodeList()
3968 			 * called by the attribute() function in SAX.c
3969 			 */
3970 			if (len + 10 > buf_size) {
3971 			    growBuffer(buf, 10);
3972 			}
3973 			buf[len++] = '&';
3974 			buf[len++] = '#';
3975 			buf[len++] = '3';
3976 			buf[len++] = '8';
3977 			buf[len++] = ';';
3978 		    }
3979 		} else if (val != 0) {
3980 		    if (len + 10 > buf_size) {
3981 			growBuffer(buf, 10);
3982 		    }
3983 		    len += xmlCopyChar(0, &buf[len], val);
3984 		}
3985 	    } else {
3986 		ent = xmlParseEntityRef(ctxt);
3987 		ctxt->nbentities++;
3988 		if (ent != NULL)
3989 		    ctxt->nbentities += ent->owner;
3990 		if ((ent != NULL) &&
3991 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3992 		    if (len + 10 > buf_size) {
3993 			growBuffer(buf, 10);
3994 		    }
3995 		    if ((ctxt->replaceEntities == 0) &&
3996 		        (ent->content[0] == '&')) {
3997 			buf[len++] = '&';
3998 			buf[len++] = '#';
3999 			buf[len++] = '3';
4000 			buf[len++] = '8';
4001 			buf[len++] = ';';
4002 		    } else {
4003 			buf[len++] = ent->content[0];
4004 		    }
4005 		} else if ((ent != NULL) &&
4006 		           (ctxt->replaceEntities != 0)) {
4007 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4008 			++ctxt->depth;
4009 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4010 						      XML_SUBSTITUTE_REF,
4011 						      0, 0, 0);
4012 			--ctxt->depth;
4013 			if (rep != NULL) {
4014 			    current = rep;
4015 			    while (*current != 0) { /* non input consuming */
4016                                 if ((*current == 0xD) || (*current == 0xA) ||
4017                                     (*current == 0x9)) {
4018                                     buf[len++] = 0x20;
4019                                     current++;
4020                                 } else
4021                                     buf[len++] = *current++;
4022 				if (len + 10 > buf_size) {
4023 				    growBuffer(buf, 10);
4024 				}
4025 			    }
4026 			    xmlFree(rep);
4027 			    rep = NULL;
4028 			}
4029 		    } else {
4030 			if (len + 10 > buf_size) {
4031 			    growBuffer(buf, 10);
4032 			}
4033 			if (ent->content != NULL)
4034 			    buf[len++] = ent->content[0];
4035 		    }
4036 		} else if (ent != NULL) {
4037 		    int i = xmlStrlen(ent->name);
4038 		    const xmlChar *cur = ent->name;
4039 
4040 		    /*
4041 		     * This may look absurd but is needed to detect
4042 		     * entities problems
4043 		     */
4044 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4045 			(ent->content != NULL) && (ent->checked == 0)) {
4046 			unsigned long oldnbent = ctxt->nbentities, diff;
4047 
4048 			++ctxt->depth;
4049 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4050 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4051 			--ctxt->depth;
4052 
4053                         diff = ctxt->nbentities - oldnbent + 1;
4054                         if (diff > INT_MAX / 2)
4055                             diff = INT_MAX / 2;
4056                         ent->checked = diff * 2;
4057 			if (rep != NULL) {
4058 			    if (xmlStrchr(rep, '<'))
4059 			        ent->checked |= 1;
4060 			    xmlFree(rep);
4061 			    rep = NULL;
4062 			} else {
4063                             ent->content[0] = 0;
4064                         }
4065 		    }
4066 
4067 		    /*
4068 		     * Just output the reference
4069 		     */
4070 		    buf[len++] = '&';
4071 		    while (len + i + 10 > buf_size) {
4072 			growBuffer(buf, i + 10);
4073 		    }
4074 		    for (;i > 0;i--)
4075 			buf[len++] = *cur++;
4076 		    buf[len++] = ';';
4077 		}
4078 	    }
4079 	} else {
4080 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4081 	        if ((len != 0) || (!normalize)) {
4082 		    if ((!normalize) || (!in_space)) {
4083 			COPY_BUF(l,buf,len,0x20);
4084 			while (len + 10 > buf_size) {
4085 			    growBuffer(buf, 10);
4086 			}
4087 		    }
4088 		    in_space = 1;
4089 		}
4090 	    } else {
4091 	        in_space = 0;
4092 		COPY_BUF(l,buf,len,c);
4093 		if (len + 10 > buf_size) {
4094 		    growBuffer(buf, 10);
4095 		}
4096 	    }
4097 	    NEXTL(l);
4098 	}
4099 	GROW;
4100 	c = CUR_CHAR(l);
4101     }
4102     if (ctxt->instate == XML_PARSER_EOF)
4103         goto error;
4104 
4105     if ((in_space) && (normalize)) {
4106         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4107     }
4108     buf[len] = 0;
4109     if (RAW == '<') {
4110 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4111     } else if (RAW != limit) {
4112 	if ((c != 0) && (!IS_CHAR(c))) {
4113 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4114 			   "invalid character in attribute value\n");
4115 	} else {
4116 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4117 			   "AttValue: ' expected\n");
4118         }
4119     } else
4120 	NEXT;
4121 
4122     /*
4123      * There we potentially risk an overflow, don't allow attribute value of
4124      * length more than INT_MAX it is a very reasonable assumption !
4125      */
4126     if (len >= INT_MAX) {
4127         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4128                        "AttValue length too long\n");
4129         goto mem_error;
4130     }
4131 
4132     if (attlen != NULL) *attlen = (int) len;
4133     return(buf);
4134 
4135 mem_error:
4136     xmlErrMemory(ctxt, NULL);
4137 error:
4138     if (buf != NULL)
4139         xmlFree(buf);
4140     if (rep != NULL)
4141         xmlFree(rep);
4142     return(NULL);
4143 }
4144 
4145 /**
4146  * xmlParseAttValue:
4147  * @ctxt:  an XML parser context
4148  *
4149  * parse a value for an attribute
4150  * Note: the parser won't do substitution of entities here, this
4151  * will be handled later in xmlStringGetNodeList
4152  *
4153  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4154  *                   "'" ([^<&'] | Reference)* "'"
4155  *
4156  * 3.3.3 Attribute-Value Normalization:
4157  * Before the value of an attribute is passed to the application or
4158  * checked for validity, the XML processor must normalize it as follows:
4159  * - a character reference is processed by appending the referenced
4160  *   character to the attribute value
4161  * - an entity reference is processed by recursively processing the
4162  *   replacement text of the entity
4163  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4164  *   appending #x20 to the normalized value, except that only a single
4165  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4166  *   parsed entity or the literal entity value of an internal parsed entity
4167  * - other characters are processed by appending them to the normalized value
4168  * If the declared value is not CDATA, then the XML processor must further
4169  * process the normalized attribute value by discarding any leading and
4170  * trailing space (#x20) characters, and by replacing sequences of space
4171  * (#x20) characters by a single space (#x20) character.
4172  * All attributes for which no declaration has been read should be treated
4173  * by a non-validating parser as if declared CDATA.
4174  *
4175  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4176  */
4177 
4178 
4179 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4180 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4181     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4182     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4183 }
4184 
4185 /**
4186  * xmlParseSystemLiteral:
4187  * @ctxt:  an XML parser context
4188  *
4189  * parse an XML Literal
4190  *
4191  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4192  *
4193  * Returns the SystemLiteral parsed or NULL
4194  */
4195 
4196 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4197 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4198     xmlChar *buf = NULL;
4199     int len = 0;
4200     int size = XML_PARSER_BUFFER_SIZE;
4201     int cur, l;
4202     xmlChar stop;
4203     int state = ctxt->instate;
4204     int count = 0;
4205 
4206     SHRINK;
4207     if (RAW == '"') {
4208         NEXT;
4209 	stop = '"';
4210     } else if (RAW == '\'') {
4211         NEXT;
4212 	stop = '\'';
4213     } else {
4214 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4215 	return(NULL);
4216     }
4217 
4218     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4219     if (buf == NULL) {
4220         xmlErrMemory(ctxt, NULL);
4221 	return(NULL);
4222     }
4223     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4224     cur = CUR_CHAR(l);
4225     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4226 	if (len + 5 >= size) {
4227 	    xmlChar *tmp;
4228 
4229             if ((size > XML_MAX_NAME_LENGTH) &&
4230                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4231                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4232                 xmlFree(buf);
4233 		ctxt->instate = (xmlParserInputState) state;
4234                 return(NULL);
4235             }
4236 	    size *= 2;
4237 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4238 	    if (tmp == NULL) {
4239 	        xmlFree(buf);
4240 		xmlErrMemory(ctxt, NULL);
4241 		ctxt->instate = (xmlParserInputState) state;
4242 		return(NULL);
4243 	    }
4244 	    buf = tmp;
4245 	}
4246 	count++;
4247 	if (count > 50) {
4248 	    SHRINK;
4249 	    GROW;
4250 	    count = 0;
4251             if (ctxt->instate == XML_PARSER_EOF) {
4252 	        xmlFree(buf);
4253 		return(NULL);
4254             }
4255 	}
4256 	COPY_BUF(l,buf,len,cur);
4257 	NEXTL(l);
4258 	cur = CUR_CHAR(l);
4259 	if (cur == 0) {
4260 	    GROW;
4261 	    SHRINK;
4262 	    cur = CUR_CHAR(l);
4263 	}
4264     }
4265     buf[len] = 0;
4266     ctxt->instate = (xmlParserInputState) state;
4267     if (!IS_CHAR(cur)) {
4268 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4269     } else {
4270 	NEXT;
4271     }
4272     return(buf);
4273 }
4274 
4275 /**
4276  * xmlParsePubidLiteral:
4277  * @ctxt:  an XML parser context
4278  *
4279  * parse an XML public literal
4280  *
4281  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4282  *
4283  * Returns the PubidLiteral parsed or NULL.
4284  */
4285 
4286 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4287 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4288     xmlChar *buf = NULL;
4289     int len = 0;
4290     int size = XML_PARSER_BUFFER_SIZE;
4291     xmlChar cur;
4292     xmlChar stop;
4293     int count = 0;
4294     xmlParserInputState oldstate = ctxt->instate;
4295 
4296     SHRINK;
4297     if (RAW == '"') {
4298         NEXT;
4299 	stop = '"';
4300     } else if (RAW == '\'') {
4301         NEXT;
4302 	stop = '\'';
4303     } else {
4304 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4305 	return(NULL);
4306     }
4307     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4308     if (buf == NULL) {
4309 	xmlErrMemory(ctxt, NULL);
4310 	return(NULL);
4311     }
4312     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4313     cur = CUR;
4314     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4315 	if (len + 1 >= size) {
4316 	    xmlChar *tmp;
4317 
4318             if ((size > XML_MAX_NAME_LENGTH) &&
4319                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321                 xmlFree(buf);
4322                 return(NULL);
4323             }
4324 	    size *= 2;
4325 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326 	    if (tmp == NULL) {
4327 		xmlErrMemory(ctxt, NULL);
4328 		xmlFree(buf);
4329 		return(NULL);
4330 	    }
4331 	    buf = tmp;
4332 	}
4333 	buf[len++] = cur;
4334 	count++;
4335 	if (count > 50) {
4336 	    SHRINK;
4337 	    GROW;
4338 	    count = 0;
4339             if (ctxt->instate == XML_PARSER_EOF) {
4340 		xmlFree(buf);
4341 		return(NULL);
4342             }
4343 	}
4344 	NEXT;
4345 	cur = CUR;
4346 	if (cur == 0) {
4347 	    GROW;
4348 	    SHRINK;
4349 	    cur = CUR;
4350 	}
4351     }
4352     buf[len] = 0;
4353     if (cur != stop) {
4354 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4355     } else {
4356 	NEXT;
4357     }
4358     ctxt->instate = oldstate;
4359     return(buf);
4360 }
4361 
4362 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4363 
4364 /*
4365  * used for the test in the inner loop of the char data testing
4366  */
4367 static const unsigned char test_char_data[256] = {
4368     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4370     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4373     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4374     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4375     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4376     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4377     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4378     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4379     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4380     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4381     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4382     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4383     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4384     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4385     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4400 };
4401 
4402 /**
4403  * xmlParseCharData:
4404  * @ctxt:  an XML parser context
4405  * @cdata:  int indicating whether we are within a CDATA section
4406  *
4407  * parse a CharData section.
4408  * if we are within a CDATA section ']]>' marks an end of section.
4409  *
4410  * The right angle bracket (>) may be represented using the string "&gt;",
4411  * and must, for compatibility, be escaped using "&gt;" or a character
4412  * reference when it appears in the string "]]>" in content, when that
4413  * string is not marking the end of a CDATA section.
4414  *
4415  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4416  */
4417 
4418 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4419 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4420     const xmlChar *in;
4421     int nbchar = 0;
4422     int line = ctxt->input->line;
4423     int col = ctxt->input->col;
4424     int ccol;
4425 
4426     SHRINK;
4427     GROW;
4428     /*
4429      * Accelerated common case where input don't need to be
4430      * modified before passing it to the handler.
4431      */
4432     if (!cdata) {
4433 	in = ctxt->input->cur;
4434 	do {
4435 get_more_space:
4436 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4437 	    if (*in == 0xA) {
4438 		do {
4439 		    ctxt->input->line++; ctxt->input->col = 1;
4440 		    in++;
4441 		} while (*in == 0xA);
4442 		goto get_more_space;
4443 	    }
4444 	    if (*in == '<') {
4445 		nbchar = in - ctxt->input->cur;
4446 		if (nbchar > 0) {
4447 		    const xmlChar *tmp = ctxt->input->cur;
4448 		    ctxt->input->cur = in;
4449 
4450 		    if ((ctxt->sax != NULL) &&
4451 		        (ctxt->sax->ignorableWhitespace !=
4452 		         ctxt->sax->characters)) {
4453 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4454 			    if (ctxt->sax->ignorableWhitespace != NULL)
4455 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4456 						       tmp, nbchar);
4457 			} else {
4458 			    if (ctxt->sax->characters != NULL)
4459 				ctxt->sax->characters(ctxt->userData,
4460 						      tmp, nbchar);
4461 			    if (*ctxt->space == -1)
4462 			        *ctxt->space = -2;
4463 			}
4464 		    } else if ((ctxt->sax != NULL) &&
4465 		               (ctxt->sax->characters != NULL)) {
4466 			ctxt->sax->characters(ctxt->userData,
4467 					      tmp, nbchar);
4468 		    }
4469 		}
4470 		return;
4471 	    }
4472 
4473 get_more:
4474             ccol = ctxt->input->col;
4475 	    while (test_char_data[*in]) {
4476 		in++;
4477 		ccol++;
4478 	    }
4479 	    ctxt->input->col = ccol;
4480 	    if (*in == 0xA) {
4481 		do {
4482 		    ctxt->input->line++; ctxt->input->col = 1;
4483 		    in++;
4484 		} while (*in == 0xA);
4485 		goto get_more;
4486 	    }
4487 	    if (*in == ']') {
4488 		if ((in[1] == ']') && (in[2] == '>')) {
4489 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4490 		    ctxt->input->cur = in + 1;
4491 		    return;
4492 		}
4493 		in++;
4494 		ctxt->input->col++;
4495 		goto get_more;
4496 	    }
4497 	    nbchar = in - ctxt->input->cur;
4498 	    if (nbchar > 0) {
4499 		if ((ctxt->sax != NULL) &&
4500 		    (ctxt->sax->ignorableWhitespace !=
4501 		     ctxt->sax->characters) &&
4502 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4503 		    const xmlChar *tmp = ctxt->input->cur;
4504 		    ctxt->input->cur = in;
4505 
4506 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4507 		        if (ctxt->sax->ignorableWhitespace != NULL)
4508 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4509 							   tmp, nbchar);
4510 		    } else {
4511 		        if (ctxt->sax->characters != NULL)
4512 			    ctxt->sax->characters(ctxt->userData,
4513 						  tmp, nbchar);
4514 			if (*ctxt->space == -1)
4515 			    *ctxt->space = -2;
4516 		    }
4517                     line = ctxt->input->line;
4518                     col = ctxt->input->col;
4519 		} else if (ctxt->sax != NULL) {
4520 		    if (ctxt->sax->characters != NULL)
4521 			ctxt->sax->characters(ctxt->userData,
4522 					      ctxt->input->cur, nbchar);
4523                     line = ctxt->input->line;
4524                     col = ctxt->input->col;
4525 		}
4526                 /* something really bad happened in the SAX callback */
4527                 if (ctxt->instate != XML_PARSER_CONTENT)
4528                     return;
4529 	    }
4530 	    ctxt->input->cur = in;
4531 	    if (*in == 0xD) {
4532 		in++;
4533 		if (*in == 0xA) {
4534 		    ctxt->input->cur = in;
4535 		    in++;
4536 		    ctxt->input->line++; ctxt->input->col = 1;
4537 		    continue; /* while */
4538 		}
4539 		in--;
4540 	    }
4541 	    if (*in == '<') {
4542 		return;
4543 	    }
4544 	    if (*in == '&') {
4545 		return;
4546 	    }
4547 	    SHRINK;
4548 	    GROW;
4549             if (ctxt->instate == XML_PARSER_EOF)
4550 		return;
4551 	    in = ctxt->input->cur;
4552 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4553 	nbchar = 0;
4554     }
4555     ctxt->input->line = line;
4556     ctxt->input->col = col;
4557     xmlParseCharDataComplex(ctxt, cdata);
4558 }
4559 
4560 /**
4561  * xmlParseCharDataComplex:
4562  * @ctxt:  an XML parser context
4563  * @cdata:  int indicating whether we are within a CDATA section
4564  *
4565  * parse a CharData section.this is the fallback function
4566  * of xmlParseCharData() when the parsing requires handling
4567  * of non-ASCII characters.
4568  */
4569 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4570 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4571     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4572     int nbchar = 0;
4573     int cur, l;
4574     int count = 0;
4575 
4576     SHRINK;
4577     GROW;
4578     cur = CUR_CHAR(l);
4579     while ((cur != '<') && /* checked */
4580            (cur != '&') &&
4581 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4582 	if ((cur == ']') && (NXT(1) == ']') &&
4583 	    (NXT(2) == '>')) {
4584 	    if (cdata) break;
4585 	    else {
4586 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4587 	    }
4588 	}
4589 	COPY_BUF(l,buf,nbchar,cur);
4590 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4591 	    buf[nbchar] = 0;
4592 
4593 	    /*
4594 	     * OK the segment is to be consumed as chars.
4595 	     */
4596 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4597 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4598 		    if (ctxt->sax->ignorableWhitespace != NULL)
4599 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4600 			                               buf, nbchar);
4601 		} else {
4602 		    if (ctxt->sax->characters != NULL)
4603 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4604 		    if ((ctxt->sax->characters !=
4605 		         ctxt->sax->ignorableWhitespace) &&
4606 			(*ctxt->space == -1))
4607 			*ctxt->space = -2;
4608 		}
4609 	    }
4610 	    nbchar = 0;
4611             /* something really bad happened in the SAX callback */
4612             if (ctxt->instate != XML_PARSER_CONTENT)
4613                 return;
4614 	}
4615 	count++;
4616 	if (count > 50) {
4617 	    SHRINK;
4618 	    GROW;
4619 	    count = 0;
4620             if (ctxt->instate == XML_PARSER_EOF)
4621 		return;
4622 	}
4623 	NEXTL(l);
4624 	cur = CUR_CHAR(l);
4625     }
4626     if (nbchar != 0) {
4627         buf[nbchar] = 0;
4628 	/*
4629 	 * OK the segment is to be consumed as chars.
4630 	 */
4631 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4632 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4633 		if (ctxt->sax->ignorableWhitespace != NULL)
4634 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4635 	    } else {
4636 		if (ctxt->sax->characters != NULL)
4637 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4638 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4639 		    (*ctxt->space == -1))
4640 		    *ctxt->space = -2;
4641 	    }
4642 	}
4643     }
4644     if ((cur != 0) && (!IS_CHAR(cur))) {
4645 	/* Generate the error and skip the offending character */
4646         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4647                           "PCDATA invalid Char value %d\n",
4648 	                  cur);
4649 	NEXTL(l);
4650     }
4651 }
4652 
4653 /**
4654  * xmlParseExternalID:
4655  * @ctxt:  an XML parser context
4656  * @publicID:  a xmlChar** receiving PubidLiteral
4657  * @strict: indicate whether we should restrict parsing to only
4658  *          production [75], see NOTE below
4659  *
4660  * Parse an External ID or a Public ID
4661  *
4662  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4663  *       'PUBLIC' S PubidLiteral S SystemLiteral
4664  *
4665  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4666  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4667  *
4668  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4669  *
4670  * Returns the function returns SystemLiteral and in the second
4671  *                case publicID receives PubidLiteral, is strict is off
4672  *                it is possible to return NULL and have publicID set.
4673  */
4674 
4675 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4676 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4677     xmlChar *URI = NULL;
4678 
4679     SHRINK;
4680 
4681     *publicID = NULL;
4682     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4683         SKIP(6);
4684 	if (SKIP_BLANKS == 0) {
4685 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4686 	                   "Space required after 'SYSTEM'\n");
4687 	}
4688 	URI = xmlParseSystemLiteral(ctxt);
4689 	if (URI == NULL) {
4690 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4691         }
4692     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4693         SKIP(6);
4694 	if (SKIP_BLANKS == 0) {
4695 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4696 		    "Space required after 'PUBLIC'\n");
4697 	}
4698 	*publicID = xmlParsePubidLiteral(ctxt);
4699 	if (*publicID == NULL) {
4700 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4701 	}
4702 	if (strict) {
4703 	    /*
4704 	     * We don't handle [83] so "S SystemLiteral" is required.
4705 	     */
4706 	    if (SKIP_BLANKS == 0) {
4707 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4708 			"Space required after the Public Identifier\n");
4709 	    }
4710 	} else {
4711 	    /*
4712 	     * We handle [83] so we return immediately, if
4713 	     * "S SystemLiteral" is not detected. We skip blanks if no
4714              * system literal was found, but this is harmless since we must
4715              * be at the end of a NotationDecl.
4716 	     */
4717 	    if (SKIP_BLANKS == 0) return(NULL);
4718 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4719 	}
4720 	URI = xmlParseSystemLiteral(ctxt);
4721 	if (URI == NULL) {
4722 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4723         }
4724     }
4725     return(URI);
4726 }
4727 
4728 /**
4729  * xmlParseCommentComplex:
4730  * @ctxt:  an XML parser context
4731  * @buf:  the already parsed part of the buffer
4732  * @len:  number of bytes in the buffer
4733  * @size:  allocated size of the buffer
4734  *
4735  * Skip an XML (SGML) comment <!-- .... -->
4736  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4737  *  must not occur within comments. "
4738  * This is the slow routine in case the accelerator for ascii didn't work
4739  *
4740  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4741  */
4742 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4743 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4744                        size_t len, size_t size) {
4745     int q, ql;
4746     int r, rl;
4747     int cur, l;
4748     size_t count = 0;
4749     int inputid;
4750 
4751     inputid = ctxt->input->id;
4752 
4753     if (buf == NULL) {
4754         len = 0;
4755 	size = XML_PARSER_BUFFER_SIZE;
4756 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4757 	if (buf == NULL) {
4758 	    xmlErrMemory(ctxt, NULL);
4759 	    return;
4760 	}
4761     }
4762     GROW;	/* Assure there's enough input data */
4763     q = CUR_CHAR(ql);
4764     if (q == 0)
4765         goto not_terminated;
4766     if (!IS_CHAR(q)) {
4767         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4768                           "xmlParseComment: invalid xmlChar value %d\n",
4769 	                  q);
4770 	xmlFree (buf);
4771 	return;
4772     }
4773     NEXTL(ql);
4774     r = CUR_CHAR(rl);
4775     if (r == 0)
4776         goto not_terminated;
4777     if (!IS_CHAR(r)) {
4778         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779                           "xmlParseComment: invalid xmlChar value %d\n",
4780 	                  q);
4781 	xmlFree (buf);
4782 	return;
4783     }
4784     NEXTL(rl);
4785     cur = CUR_CHAR(l);
4786     if (cur == 0)
4787         goto not_terminated;
4788     while (IS_CHAR(cur) && /* checked */
4789            ((cur != '>') ||
4790 	    (r != '-') || (q != '-'))) {
4791 	if ((r == '-') && (q == '-')) {
4792 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4793 	}
4794         if ((len > XML_MAX_TEXT_LENGTH) &&
4795             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4796             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797                          "Comment too big found", NULL);
4798             xmlFree (buf);
4799             return;
4800         }
4801 	if (len + 5 >= size) {
4802 	    xmlChar *new_buf;
4803             size_t new_size;
4804 
4805 	    new_size = size * 2;
4806 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4807 	    if (new_buf == NULL) {
4808 		xmlFree (buf);
4809 		xmlErrMemory(ctxt, NULL);
4810 		return;
4811 	    }
4812 	    buf = new_buf;
4813             size = new_size;
4814 	}
4815 	COPY_BUF(ql,buf,len,q);
4816 	q = r;
4817 	ql = rl;
4818 	r = cur;
4819 	rl = l;
4820 
4821 	count++;
4822 	if (count > 50) {
4823 	    SHRINK;
4824 	    GROW;
4825 	    count = 0;
4826             if (ctxt->instate == XML_PARSER_EOF) {
4827 		xmlFree(buf);
4828 		return;
4829             }
4830 	}
4831 	NEXTL(l);
4832 	cur = CUR_CHAR(l);
4833 	if (cur == 0) {
4834 	    SHRINK;
4835 	    GROW;
4836 	    cur = CUR_CHAR(l);
4837 	}
4838     }
4839     buf[len] = 0;
4840     if (cur == 0) {
4841 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4842 	                     "Comment not terminated \n<!--%.50s\n", buf);
4843     } else if (!IS_CHAR(cur)) {
4844         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845                           "xmlParseComment: invalid xmlChar value %d\n",
4846 	                  cur);
4847     } else {
4848 	if (inputid != ctxt->input->id) {
4849 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4850 		           "Comment doesn't start and stop in the same"
4851                            " entity\n");
4852 	}
4853         NEXT;
4854 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855 	    (!ctxt->disableSAX))
4856 	    ctxt->sax->comment(ctxt->userData, buf);
4857     }
4858     xmlFree(buf);
4859     return;
4860 not_terminated:
4861     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 			 "Comment not terminated\n", NULL);
4863     xmlFree(buf);
4864     return;
4865 }
4866 
4867 /**
4868  * xmlParseComment:
4869  * @ctxt:  an XML parser context
4870  *
4871  * Skip an XML (SGML) comment <!-- .... -->
4872  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4873  *  must not occur within comments. "
4874  *
4875  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4876  */
4877 void
xmlParseComment(xmlParserCtxtPtr ctxt)4878 xmlParseComment(xmlParserCtxtPtr ctxt) {
4879     xmlChar *buf = NULL;
4880     size_t size = XML_PARSER_BUFFER_SIZE;
4881     size_t len = 0;
4882     xmlParserInputState state;
4883     const xmlChar *in;
4884     size_t nbchar = 0;
4885     int ccol;
4886     int inputid;
4887 
4888     /*
4889      * Check that there is a comment right here.
4890      */
4891     if ((RAW != '<') || (NXT(1) != '!') ||
4892         (NXT(2) != '-') || (NXT(3) != '-')) return;
4893     state = ctxt->instate;
4894     ctxt->instate = XML_PARSER_COMMENT;
4895     inputid = ctxt->input->id;
4896     SKIP(4);
4897     SHRINK;
4898     GROW;
4899 
4900     /*
4901      * Accelerated common case where input don't need to be
4902      * modified before passing it to the handler.
4903      */
4904     in = ctxt->input->cur;
4905     do {
4906 	if (*in == 0xA) {
4907 	    do {
4908 		ctxt->input->line++; ctxt->input->col = 1;
4909 		in++;
4910 	    } while (*in == 0xA);
4911 	}
4912 get_more:
4913         ccol = ctxt->input->col;
4914 	while (((*in > '-') && (*in <= 0x7F)) ||
4915 	       ((*in >= 0x20) && (*in < '-')) ||
4916 	       (*in == 0x09)) {
4917 		    in++;
4918 		    ccol++;
4919 	}
4920 	ctxt->input->col = ccol;
4921 	if (*in == 0xA) {
4922 	    do {
4923 		ctxt->input->line++; ctxt->input->col = 1;
4924 		in++;
4925 	    } while (*in == 0xA);
4926 	    goto get_more;
4927 	}
4928 	nbchar = in - ctxt->input->cur;
4929 	/*
4930 	 * save current set of data
4931 	 */
4932 	if (nbchar > 0) {
4933 	    if ((ctxt->sax != NULL) &&
4934 		(ctxt->sax->comment != NULL)) {
4935 		if (buf == NULL) {
4936 		    if ((*in == '-') && (in[1] == '-'))
4937 		        size = nbchar + 1;
4938 		    else
4939 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4940 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941 		    if (buf == NULL) {
4942 		        xmlErrMemory(ctxt, NULL);
4943 			ctxt->instate = state;
4944 			return;
4945 		    }
4946 		    len = 0;
4947 		} else if (len + nbchar + 1 >= size) {
4948 		    xmlChar *new_buf;
4949 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950 		    new_buf = (xmlChar *) xmlRealloc(buf,
4951 		                                     size * sizeof(xmlChar));
4952 		    if (new_buf == NULL) {
4953 		        xmlFree (buf);
4954 			xmlErrMemory(ctxt, NULL);
4955 			ctxt->instate = state;
4956 			return;
4957 		    }
4958 		    buf = new_buf;
4959 		}
4960 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4961 		len += nbchar;
4962 		buf[len] = 0;
4963 	    }
4964 	}
4965         if ((len > XML_MAX_TEXT_LENGTH) &&
4966             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968                          "Comment too big found", NULL);
4969             xmlFree (buf);
4970             return;
4971         }
4972 	ctxt->input->cur = in;
4973 	if (*in == 0xA) {
4974 	    in++;
4975 	    ctxt->input->line++; ctxt->input->col = 1;
4976 	}
4977 	if (*in == 0xD) {
4978 	    in++;
4979 	    if (*in == 0xA) {
4980 		ctxt->input->cur = in;
4981 		in++;
4982 		ctxt->input->line++; ctxt->input->col = 1;
4983 		continue; /* while */
4984 	    }
4985 	    in--;
4986 	}
4987 	SHRINK;
4988 	GROW;
4989         if (ctxt->instate == XML_PARSER_EOF) {
4990             xmlFree(buf);
4991             return;
4992         }
4993 	in = ctxt->input->cur;
4994 	if (*in == '-') {
4995 	    if (in[1] == '-') {
4996 	        if (in[2] == '>') {
4997 		    if (ctxt->input->id != inputid) {
4998 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 			               "comment doesn't start and stop in the"
5000                                        " same entity\n");
5001 		    }
5002 		    SKIP(3);
5003 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004 		        (!ctxt->disableSAX)) {
5005 			if (buf != NULL)
5006 			    ctxt->sax->comment(ctxt->userData, buf);
5007 			else
5008 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5009 		    }
5010 		    if (buf != NULL)
5011 		        xmlFree(buf);
5012 		    if (ctxt->instate != XML_PARSER_EOF)
5013 			ctxt->instate = state;
5014 		    return;
5015 		}
5016 		if (buf != NULL) {
5017 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5018 		                      "Double hyphen within comment: "
5019                                       "<!--%.50s\n",
5020 				      buf);
5021 		} else
5022 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5023 		                      "Double hyphen within comment\n", NULL);
5024                 if (ctxt->instate == XML_PARSER_EOF) {
5025                     xmlFree(buf);
5026                     return;
5027                 }
5028 		in++;
5029 		ctxt->input->col++;
5030 	    }
5031 	    in++;
5032 	    ctxt->input->col++;
5033 	    goto get_more;
5034 	}
5035     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5036     xmlParseCommentComplex(ctxt, buf, len, size);
5037     ctxt->instate = state;
5038     return;
5039 }
5040 
5041 
5042 /**
5043  * xmlParsePITarget:
5044  * @ctxt:  an XML parser context
5045  *
5046  * parse the name of a PI
5047  *
5048  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5049  *
5050  * Returns the PITarget name or NULL
5051  */
5052 
5053 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5054 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5055     const xmlChar *name;
5056 
5057     name = xmlParseName(ctxt);
5058     if ((name != NULL) &&
5059         ((name[0] == 'x') || (name[0] == 'X')) &&
5060         ((name[1] == 'm') || (name[1] == 'M')) &&
5061         ((name[2] == 'l') || (name[2] == 'L'))) {
5062 	int i;
5063 	if ((name[0] == 'x') && (name[1] == 'm') &&
5064 	    (name[2] == 'l') && (name[3] == 0)) {
5065 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5066 		 "XML declaration allowed only at the start of the document\n");
5067 	    return(name);
5068 	} else if (name[3] == 0) {
5069 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5070 	    return(name);
5071 	}
5072 	for (i = 0;;i++) {
5073 	    if (xmlW3CPIs[i] == NULL) break;
5074 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5075 	        return(name);
5076 	}
5077 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5079 		      NULL, NULL);
5080     }
5081     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5082 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5083 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5084     }
5085     return(name);
5086 }
5087 
5088 #ifdef LIBXML_CATALOG_ENABLED
5089 /**
5090  * xmlParseCatalogPI:
5091  * @ctxt:  an XML parser context
5092  * @catalog:  the PI value string
5093  *
5094  * parse an XML Catalog Processing Instruction.
5095  *
5096  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5097  *
5098  * Occurs only if allowed by the user and if happening in the Misc
5099  * part of the document before any doctype information
5100  * This will add the given catalog to the parsing context in order
5101  * to be used if there is a resolution need further down in the document
5102  */
5103 
5104 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5105 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5106     xmlChar *URL = NULL;
5107     const xmlChar *tmp, *base;
5108     xmlChar marker;
5109 
5110     tmp = catalog;
5111     while (IS_BLANK_CH(*tmp)) tmp++;
5112     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5113 	goto error;
5114     tmp += 7;
5115     while (IS_BLANK_CH(*tmp)) tmp++;
5116     if (*tmp != '=') {
5117 	return;
5118     }
5119     tmp++;
5120     while (IS_BLANK_CH(*tmp)) tmp++;
5121     marker = *tmp;
5122     if ((marker != '\'') && (marker != '"'))
5123 	goto error;
5124     tmp++;
5125     base = tmp;
5126     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5127     if (*tmp == 0)
5128 	goto error;
5129     URL = xmlStrndup(base, tmp - base);
5130     tmp++;
5131     while (IS_BLANK_CH(*tmp)) tmp++;
5132     if (*tmp != 0)
5133 	goto error;
5134 
5135     if (URL != NULL) {
5136 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5137 	xmlFree(URL);
5138     }
5139     return;
5140 
5141 error:
5142     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5143 	          "Catalog PI syntax error: %s\n",
5144 		  catalog, NULL);
5145     if (URL != NULL)
5146 	xmlFree(URL);
5147 }
5148 #endif
5149 
5150 /**
5151  * xmlParsePI:
5152  * @ctxt:  an XML parser context
5153  *
5154  * parse an XML Processing Instruction.
5155  *
5156  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5157  *
5158  * The processing is transferred to SAX once parsed.
5159  */
5160 
5161 void
xmlParsePI(xmlParserCtxtPtr ctxt)5162 xmlParsePI(xmlParserCtxtPtr ctxt) {
5163     xmlChar *buf = NULL;
5164     size_t len = 0;
5165     size_t size = XML_PARSER_BUFFER_SIZE;
5166     int cur, l;
5167     const xmlChar *target;
5168     xmlParserInputState state;
5169     int count = 0;
5170 
5171     if ((RAW == '<') && (NXT(1) == '?')) {
5172 	int inputid = ctxt->input->id;
5173 	state = ctxt->instate;
5174         ctxt->instate = XML_PARSER_PI;
5175 	/*
5176 	 * this is a Processing Instruction.
5177 	 */
5178 	SKIP(2);
5179 	SHRINK;
5180 
5181 	/*
5182 	 * Parse the target name and check for special support like
5183 	 * namespace.
5184 	 */
5185         target = xmlParsePITarget(ctxt);
5186 	if (target != NULL) {
5187 	    if ((RAW == '?') && (NXT(1) == '>')) {
5188 		if (inputid != ctxt->input->id) {
5189 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5190 	                           "PI declaration doesn't start and stop in"
5191                                    " the same entity\n");
5192 		}
5193 		SKIP(2);
5194 
5195 		/*
5196 		 * SAX: PI detected.
5197 		 */
5198 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5199 		    (ctxt->sax->processingInstruction != NULL))
5200 		    ctxt->sax->processingInstruction(ctxt->userData,
5201 		                                     target, NULL);
5202 		if (ctxt->instate != XML_PARSER_EOF)
5203 		    ctxt->instate = state;
5204 		return;
5205 	    }
5206 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5207 	    if (buf == NULL) {
5208 		xmlErrMemory(ctxt, NULL);
5209 		ctxt->instate = state;
5210 		return;
5211 	    }
5212 	    if (SKIP_BLANKS == 0) {
5213 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5214 			  "ParsePI: PI %s space expected\n", target);
5215 	    }
5216 	    cur = CUR_CHAR(l);
5217 	    while (IS_CHAR(cur) && /* checked */
5218 		   ((cur != '?') || (NXT(1) != '>'))) {
5219 		if (len + 5 >= size) {
5220 		    xmlChar *tmp;
5221                     size_t new_size = size * 2;
5222 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5223 		    if (tmp == NULL) {
5224 			xmlErrMemory(ctxt, NULL);
5225 			xmlFree(buf);
5226 			ctxt->instate = state;
5227 			return;
5228 		    }
5229 		    buf = tmp;
5230                     size = new_size;
5231 		}
5232 		count++;
5233 		if (count > 50) {
5234 		    SHRINK;
5235 		    GROW;
5236                     if (ctxt->instate == XML_PARSER_EOF) {
5237                         xmlFree(buf);
5238                         return;
5239                     }
5240 		    count = 0;
5241                     if ((len > XML_MAX_TEXT_LENGTH) &&
5242                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5243                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5244                                           "PI %s too big found", target);
5245                         xmlFree(buf);
5246                         ctxt->instate = state;
5247                         return;
5248                     }
5249 		}
5250 		COPY_BUF(l,buf,len,cur);
5251 		NEXTL(l);
5252 		cur = CUR_CHAR(l);
5253 		if (cur == 0) {
5254 		    SHRINK;
5255 		    GROW;
5256 		    cur = CUR_CHAR(l);
5257 		}
5258 	    }
5259             if ((len > XML_MAX_TEXT_LENGTH) &&
5260                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5261                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5262                                   "PI %s too big found", target);
5263                 xmlFree(buf);
5264                 ctxt->instate = state;
5265                 return;
5266             }
5267 	    buf[len] = 0;
5268 	    if (cur != '?') {
5269 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5270 		      "ParsePI: PI %s never end ...\n", target);
5271 	    } else {
5272 		if (inputid != ctxt->input->id) {
5273 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5274 	                           "PI declaration doesn't start and stop in"
5275                                    " the same entity\n");
5276 		}
5277 		SKIP(2);
5278 
5279 #ifdef LIBXML_CATALOG_ENABLED
5280 		if (((state == XML_PARSER_MISC) ||
5281 	             (state == XML_PARSER_START)) &&
5282 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5283 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5284 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5285 			(allow == XML_CATA_ALLOW_ALL))
5286 			xmlParseCatalogPI(ctxt, buf);
5287 		}
5288 #endif
5289 
5290 
5291 		/*
5292 		 * SAX: PI detected.
5293 		 */
5294 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295 		    (ctxt->sax->processingInstruction != NULL))
5296 		    ctxt->sax->processingInstruction(ctxt->userData,
5297 		                                     target, buf);
5298 	    }
5299 	    xmlFree(buf);
5300 	} else {
5301 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302 	}
5303 	if (ctxt->instate != XML_PARSER_EOF)
5304 	    ctxt->instate = state;
5305     }
5306 }
5307 
5308 /**
5309  * xmlParseNotationDecl:
5310  * @ctxt:  an XML parser context
5311  *
5312  * parse a notation declaration
5313  *
5314  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5315  *
5316  * Hence there is actually 3 choices:
5317  *     'PUBLIC' S PubidLiteral
5318  *     'PUBLIC' S PubidLiteral S SystemLiteral
5319  * and 'SYSTEM' S SystemLiteral
5320  *
5321  * See the NOTE on xmlParseExternalID().
5322  */
5323 
5324 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5325 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5326     const xmlChar *name;
5327     xmlChar *Pubid;
5328     xmlChar *Systemid;
5329 
5330     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5331 	int inputid = ctxt->input->id;
5332 	SHRINK;
5333 	SKIP(10);
5334 	if (SKIP_BLANKS == 0) {
5335 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5336 			   "Space required after '<!NOTATION'\n");
5337 	    return;
5338 	}
5339 
5340         name = xmlParseName(ctxt);
5341 	if (name == NULL) {
5342 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5343 	    return;
5344 	}
5345 	if (xmlStrchr(name, ':') != NULL) {
5346 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5347 		     "colons are forbidden from notation names '%s'\n",
5348 		     name, NULL, NULL);
5349 	}
5350 	if (SKIP_BLANKS == 0) {
5351 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 		     "Space required after the NOTATION name'\n");
5353 	    return;
5354 	}
5355 
5356 	/*
5357 	 * Parse the IDs.
5358 	 */
5359 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5360 	SKIP_BLANKS;
5361 
5362 	if (RAW == '>') {
5363 	    if (inputid != ctxt->input->id) {
5364 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365 	                       "Notation declaration doesn't start and stop"
5366                                " in the same entity\n");
5367 	    }
5368 	    NEXT;
5369 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5370 		(ctxt->sax->notationDecl != NULL))
5371 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5372 	} else {
5373 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5374 	}
5375 	if (Systemid != NULL) xmlFree(Systemid);
5376 	if (Pubid != NULL) xmlFree(Pubid);
5377     }
5378 }
5379 
5380 /**
5381  * xmlParseEntityDecl:
5382  * @ctxt:  an XML parser context
5383  *
5384  * parse <!ENTITY declarations
5385  *
5386  * [70] EntityDecl ::= GEDecl | PEDecl
5387  *
5388  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5389  *
5390  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5391  *
5392  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5393  *
5394  * [74] PEDef ::= EntityValue | ExternalID
5395  *
5396  * [76] NDataDecl ::= S 'NDATA' S Name
5397  *
5398  * [ VC: Notation Declared ]
5399  * The Name must match the declared name of a notation.
5400  */
5401 
5402 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5403 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5404     const xmlChar *name = NULL;
5405     xmlChar *value = NULL;
5406     xmlChar *URI = NULL, *literal = NULL;
5407     const xmlChar *ndata = NULL;
5408     int isParameter = 0;
5409     xmlChar *orig = NULL;
5410 
5411     /* GROW; done in the caller */
5412     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5413 	int inputid = ctxt->input->id;
5414 	SHRINK;
5415 	SKIP(8);
5416 	if (SKIP_BLANKS == 0) {
5417 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5418 			   "Space required after '<!ENTITY'\n");
5419 	}
5420 
5421 	if (RAW == '%') {
5422 	    NEXT;
5423 	    if (SKIP_BLANKS == 0) {
5424 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5425 			       "Space required after '%%'\n");
5426 	    }
5427 	    isParameter = 1;
5428 	}
5429 
5430         name = xmlParseName(ctxt);
5431 	if (name == NULL) {
5432 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5433 	                   "xmlParseEntityDecl: no name\n");
5434             return;
5435 	}
5436 	if (xmlStrchr(name, ':') != NULL) {
5437 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5438 		     "colons are forbidden from entities names '%s'\n",
5439 		     name, NULL, NULL);
5440 	}
5441 	if (SKIP_BLANKS == 0) {
5442 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5443 			   "Space required after the entity name\n");
5444 	}
5445 
5446 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5447 	/*
5448 	 * handle the various case of definitions...
5449 	 */
5450 	if (isParameter) {
5451 	    if ((RAW == '"') || (RAW == '\'')) {
5452 	        value = xmlParseEntityValue(ctxt, &orig);
5453 		if (value) {
5454 		    if ((ctxt->sax != NULL) &&
5455 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5456 			ctxt->sax->entityDecl(ctxt->userData, name,
5457 		                    XML_INTERNAL_PARAMETER_ENTITY,
5458 				    NULL, NULL, value);
5459 		}
5460 	    } else {
5461 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5462 		if ((URI == NULL) && (literal == NULL)) {
5463 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5464 		}
5465 		if (URI) {
5466 		    xmlURIPtr uri;
5467 
5468 		    uri = xmlParseURI((const char *) URI);
5469 		    if (uri == NULL) {
5470 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5471 				     "Invalid URI: %s\n", URI);
5472 			/*
5473 			 * This really ought to be a well formedness error
5474 			 * but the XML Core WG decided otherwise c.f. issue
5475 			 * E26 of the XML erratas.
5476 			 */
5477 		    } else {
5478 			if (uri->fragment != NULL) {
5479 			    /*
5480 			     * Okay this is foolish to block those but not
5481 			     * invalid URIs.
5482 			     */
5483 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484 			} else {
5485 			    if ((ctxt->sax != NULL) &&
5486 				(!ctxt->disableSAX) &&
5487 				(ctxt->sax->entityDecl != NULL))
5488 				ctxt->sax->entityDecl(ctxt->userData, name,
5489 					    XML_EXTERNAL_PARAMETER_ENTITY,
5490 					    literal, URI, NULL);
5491 			}
5492 			xmlFreeURI(uri);
5493 		    }
5494 		}
5495 	    }
5496 	} else {
5497 	    if ((RAW == '"') || (RAW == '\'')) {
5498 	        value = xmlParseEntityValue(ctxt, &orig);
5499 		if ((ctxt->sax != NULL) &&
5500 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5501 		    ctxt->sax->entityDecl(ctxt->userData, name,
5502 				XML_INTERNAL_GENERAL_ENTITY,
5503 				NULL, NULL, value);
5504 		/*
5505 		 * For expat compatibility in SAX mode.
5506 		 */
5507 		if ((ctxt->myDoc == NULL) ||
5508 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5509 		    if (ctxt->myDoc == NULL) {
5510 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 			if (ctxt->myDoc == NULL) {
5512 			    xmlErrMemory(ctxt, "New Doc failed");
5513 			    return;
5514 			}
5515 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 		    }
5517 		    if (ctxt->myDoc->intSubset == NULL)
5518 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5519 					    BAD_CAST "fake", NULL, NULL);
5520 
5521 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5522 			              NULL, NULL, value);
5523 		}
5524 	    } else {
5525 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5526 		if ((URI == NULL) && (literal == NULL)) {
5527 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5528 		}
5529 		if (URI) {
5530 		    xmlURIPtr uri;
5531 
5532 		    uri = xmlParseURI((const char *)URI);
5533 		    if (uri == NULL) {
5534 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5535 				     "Invalid URI: %s\n", URI);
5536 			/*
5537 			 * This really ought to be a well formedness error
5538 			 * but the XML Core WG decided otherwise c.f. issue
5539 			 * E26 of the XML erratas.
5540 			 */
5541 		    } else {
5542 			if (uri->fragment != NULL) {
5543 			    /*
5544 			     * Okay this is foolish to block those but not
5545 			     * invalid URIs.
5546 			     */
5547 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5548 			}
5549 			xmlFreeURI(uri);
5550 		    }
5551 		}
5552 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5553 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5554 				   "Space required before 'NDATA'\n");
5555 		}
5556 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5557 		    SKIP(5);
5558 		    if (SKIP_BLANKS == 0) {
5559 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 				       "Space required after 'NDATA'\n");
5561 		    }
5562 		    ndata = xmlParseName(ctxt);
5563 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5564 		        (ctxt->sax->unparsedEntityDecl != NULL))
5565 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5566 				    literal, URI, ndata);
5567 		} else {
5568 		    if ((ctxt->sax != NULL) &&
5569 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5570 			ctxt->sax->entityDecl(ctxt->userData, name,
5571 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5572 				    literal, URI, NULL);
5573 		    /*
5574 		     * For expat compatibility in SAX mode.
5575 		     * assuming the entity replacement was asked for
5576 		     */
5577 		    if ((ctxt->replaceEntities != 0) &&
5578 			((ctxt->myDoc == NULL) ||
5579 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5580 			if (ctxt->myDoc == NULL) {
5581 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5582 			    if (ctxt->myDoc == NULL) {
5583 			        xmlErrMemory(ctxt, "New Doc failed");
5584 				return;
5585 			    }
5586 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5587 			}
5588 
5589 			if (ctxt->myDoc->intSubset == NULL)
5590 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5591 						BAD_CAST "fake", NULL, NULL);
5592 			xmlSAX2EntityDecl(ctxt, name,
5593 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5594 				          literal, URI, NULL);
5595 		    }
5596 		}
5597 	    }
5598 	}
5599 	if (ctxt->instate == XML_PARSER_EOF)
5600 	    goto done;
5601 	SKIP_BLANKS;
5602 	if (RAW != '>') {
5603 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5604 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5605 	    xmlHaltParser(ctxt);
5606 	} else {
5607 	    if (inputid != ctxt->input->id) {
5608 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609 	                       "Entity declaration doesn't start and stop in"
5610                                " the same entity\n");
5611 	    }
5612 	    NEXT;
5613 	}
5614 	if (orig != NULL) {
5615 	    /*
5616 	     * Ugly mechanism to save the raw entity value.
5617 	     */
5618 	    xmlEntityPtr cur = NULL;
5619 
5620 	    if (isParameter) {
5621 	        if ((ctxt->sax != NULL) &&
5622 		    (ctxt->sax->getParameterEntity != NULL))
5623 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5624 	    } else {
5625 	        if ((ctxt->sax != NULL) &&
5626 		    (ctxt->sax->getEntity != NULL))
5627 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5628 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5629 		    cur = xmlSAX2GetEntity(ctxt, name);
5630 		}
5631 	    }
5632             if ((cur != NULL) && (cur->orig == NULL)) {
5633 		cur->orig = orig;
5634                 orig = NULL;
5635 	    }
5636 	}
5637 
5638 done:
5639 	if (value != NULL) xmlFree(value);
5640 	if (URI != NULL) xmlFree(URI);
5641 	if (literal != NULL) xmlFree(literal);
5642         if (orig != NULL) xmlFree(orig);
5643     }
5644 }
5645 
5646 /**
5647  * xmlParseDefaultDecl:
5648  * @ctxt:  an XML parser context
5649  * @value:  Receive a possible fixed default value for the attribute
5650  *
5651  * Parse an attribute default declaration
5652  *
5653  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5654  *
5655  * [ VC: Required Attribute ]
5656  * if the default declaration is the keyword #REQUIRED, then the
5657  * attribute must be specified for all elements of the type in the
5658  * attribute-list declaration.
5659  *
5660  * [ VC: Attribute Default Legal ]
5661  * The declared default value must meet the lexical constraints of
5662  * the declared attribute type c.f. xmlValidateAttributeDecl()
5663  *
5664  * [ VC: Fixed Attribute Default ]
5665  * if an attribute has a default value declared with the #FIXED
5666  * keyword, instances of that attribute must match the default value.
5667  *
5668  * [ WFC: No < in Attribute Values ]
5669  * handled in xmlParseAttValue()
5670  *
5671  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5672  *          or XML_ATTRIBUTE_FIXED.
5673  */
5674 
5675 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5676 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5677     int val;
5678     xmlChar *ret;
5679 
5680     *value = NULL;
5681     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5682 	SKIP(9);
5683 	return(XML_ATTRIBUTE_REQUIRED);
5684     }
5685     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5686 	SKIP(8);
5687 	return(XML_ATTRIBUTE_IMPLIED);
5688     }
5689     val = XML_ATTRIBUTE_NONE;
5690     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5691 	SKIP(6);
5692 	val = XML_ATTRIBUTE_FIXED;
5693 	if (SKIP_BLANKS == 0) {
5694 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5695 			   "Space required after '#FIXED'\n");
5696 	}
5697     }
5698     ret = xmlParseAttValue(ctxt);
5699     ctxt->instate = XML_PARSER_DTD;
5700     if (ret == NULL) {
5701 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5702 		       "Attribute default value declaration error\n");
5703     } else
5704         *value = ret;
5705     return(val);
5706 }
5707 
5708 /**
5709  * xmlParseNotationType:
5710  * @ctxt:  an XML parser context
5711  *
5712  * parse an Notation attribute type.
5713  *
5714  * Note: the leading 'NOTATION' S part has already being parsed...
5715  *
5716  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5717  *
5718  * [ VC: Notation Attributes ]
5719  * Values of this type must match one of the notation names included
5720  * in the declaration; all notation names in the declaration must be declared.
5721  *
5722  * Returns: the notation attribute tree built while parsing
5723  */
5724 
5725 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5726 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5727     const xmlChar *name;
5728     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5729 
5730     if (RAW != '(') {
5731 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5732 	return(NULL);
5733     }
5734     SHRINK;
5735     do {
5736         NEXT;
5737 	SKIP_BLANKS;
5738         name = xmlParseName(ctxt);
5739 	if (name == NULL) {
5740 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741 			   "Name expected in NOTATION declaration\n");
5742             xmlFreeEnumeration(ret);
5743 	    return(NULL);
5744 	}
5745 	tmp = ret;
5746 	while (tmp != NULL) {
5747 	    if (xmlStrEqual(name, tmp->name)) {
5748 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749 	  "standalone: attribute notation value token %s duplicated\n",
5750 				 name, NULL);
5751 		if (!xmlDictOwns(ctxt->dict, name))
5752 		    xmlFree((xmlChar *) name);
5753 		break;
5754 	    }
5755 	    tmp = tmp->next;
5756 	}
5757 	if (tmp == NULL) {
5758 	    cur = xmlCreateEnumeration(name);
5759 	    if (cur == NULL) {
5760                 xmlFreeEnumeration(ret);
5761                 return(NULL);
5762             }
5763 	    if (last == NULL) ret = last = cur;
5764 	    else {
5765 		last->next = cur;
5766 		last = cur;
5767 	    }
5768 	}
5769 	SKIP_BLANKS;
5770     } while (RAW == '|');
5771     if (RAW != ')') {
5772 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5773         xmlFreeEnumeration(ret);
5774 	return(NULL);
5775     }
5776     NEXT;
5777     return(ret);
5778 }
5779 
5780 /**
5781  * xmlParseEnumerationType:
5782  * @ctxt:  an XML parser context
5783  *
5784  * parse an Enumeration attribute type.
5785  *
5786  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5787  *
5788  * [ VC: Enumeration ]
5789  * Values of this type must match one of the Nmtoken tokens in
5790  * the declaration
5791  *
5792  * Returns: the enumeration attribute tree built while parsing
5793  */
5794 
5795 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5796 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5797     xmlChar *name;
5798     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5799 
5800     if (RAW != '(') {
5801 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5802 	return(NULL);
5803     }
5804     SHRINK;
5805     do {
5806         NEXT;
5807 	SKIP_BLANKS;
5808         name = xmlParseNmtoken(ctxt);
5809 	if (name == NULL) {
5810 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5811 	    return(ret);
5812 	}
5813 	tmp = ret;
5814 	while (tmp != NULL) {
5815 	    if (xmlStrEqual(name, tmp->name)) {
5816 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817 	  "standalone: attribute enumeration value token %s duplicated\n",
5818 				 name, NULL);
5819 		if (!xmlDictOwns(ctxt->dict, name))
5820 		    xmlFree(name);
5821 		break;
5822 	    }
5823 	    tmp = tmp->next;
5824 	}
5825 	if (tmp == NULL) {
5826 	    cur = xmlCreateEnumeration(name);
5827 	    if (!xmlDictOwns(ctxt->dict, name))
5828 		xmlFree(name);
5829 	    if (cur == NULL) {
5830                 xmlFreeEnumeration(ret);
5831                 return(NULL);
5832             }
5833 	    if (last == NULL) ret = last = cur;
5834 	    else {
5835 		last->next = cur;
5836 		last = cur;
5837 	    }
5838 	}
5839 	SKIP_BLANKS;
5840     } while (RAW == '|');
5841     if (RAW != ')') {
5842 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5843 	return(ret);
5844     }
5845     NEXT;
5846     return(ret);
5847 }
5848 
5849 /**
5850  * xmlParseEnumeratedType:
5851  * @ctxt:  an XML parser context
5852  * @tree:  the enumeration tree built while parsing
5853  *
5854  * parse an Enumerated attribute type.
5855  *
5856  * [57] EnumeratedType ::= NotationType | Enumeration
5857  *
5858  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5859  *
5860  *
5861  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5862  */
5863 
5864 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5865 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5866     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5867 	SKIP(8);
5868 	if (SKIP_BLANKS == 0) {
5869 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870 			   "Space required after 'NOTATION'\n");
5871 	    return(0);
5872 	}
5873 	*tree = xmlParseNotationType(ctxt);
5874 	if (*tree == NULL) return(0);
5875 	return(XML_ATTRIBUTE_NOTATION);
5876     }
5877     *tree = xmlParseEnumerationType(ctxt);
5878     if (*tree == NULL) return(0);
5879     return(XML_ATTRIBUTE_ENUMERATION);
5880 }
5881 
5882 /**
5883  * xmlParseAttributeType:
5884  * @ctxt:  an XML parser context
5885  * @tree:  the enumeration tree built while parsing
5886  *
5887  * parse the Attribute list def for an element
5888  *
5889  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5890  *
5891  * [55] StringType ::= 'CDATA'
5892  *
5893  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5894  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5895  *
5896  * Validity constraints for attribute values syntax are checked in
5897  * xmlValidateAttributeValue()
5898  *
5899  * [ VC: ID ]
5900  * Values of type ID must match the Name production. A name must not
5901  * appear more than once in an XML document as a value of this type;
5902  * i.e., ID values must uniquely identify the elements which bear them.
5903  *
5904  * [ VC: One ID per Element Type ]
5905  * No element type may have more than one ID attribute specified.
5906  *
5907  * [ VC: ID Attribute Default ]
5908  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5909  *
5910  * [ VC: IDREF ]
5911  * Values of type IDREF must match the Name production, and values
5912  * of type IDREFS must match Names; each IDREF Name must match the value
5913  * of an ID attribute on some element in the XML document; i.e. IDREF
5914  * values must match the value of some ID attribute.
5915  *
5916  * [ VC: Entity Name ]
5917  * Values of type ENTITY must match the Name production, values
5918  * of type ENTITIES must match Names; each Entity Name must match the
5919  * name of an unparsed entity declared in the DTD.
5920  *
5921  * [ VC: Name Token ]
5922  * Values of type NMTOKEN must match the Nmtoken production; values
5923  * of type NMTOKENS must match Nmtokens.
5924  *
5925  * Returns the attribute type
5926  */
5927 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5928 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5929     SHRINK;
5930     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5931 	SKIP(5);
5932 	return(XML_ATTRIBUTE_CDATA);
5933      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5934 	SKIP(6);
5935 	return(XML_ATTRIBUTE_IDREFS);
5936      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5937 	SKIP(5);
5938 	return(XML_ATTRIBUTE_IDREF);
5939      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5940         SKIP(2);
5941 	return(XML_ATTRIBUTE_ID);
5942      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5943 	SKIP(6);
5944 	return(XML_ATTRIBUTE_ENTITY);
5945      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5946 	SKIP(8);
5947 	return(XML_ATTRIBUTE_ENTITIES);
5948      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5949 	SKIP(8);
5950 	return(XML_ATTRIBUTE_NMTOKENS);
5951      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5952 	SKIP(7);
5953 	return(XML_ATTRIBUTE_NMTOKEN);
5954      }
5955      return(xmlParseEnumeratedType(ctxt, tree));
5956 }
5957 
5958 /**
5959  * xmlParseAttributeListDecl:
5960  * @ctxt:  an XML parser context
5961  *
5962  * : parse the Attribute list def for an element
5963  *
5964  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5965  *
5966  * [53] AttDef ::= S Name S AttType S DefaultDecl
5967  *
5968  */
5969 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5970 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5971     const xmlChar *elemName;
5972     const xmlChar *attrName;
5973     xmlEnumerationPtr tree;
5974 
5975     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976 	int inputid = ctxt->input->id;
5977 
5978 	SKIP(9);
5979 	if (SKIP_BLANKS == 0) {
5980 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5981 		                 "Space required after '<!ATTLIST'\n");
5982 	}
5983         elemName = xmlParseName(ctxt);
5984 	if (elemName == NULL) {
5985 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5986 			   "ATTLIST: no name for Element\n");
5987 	    return;
5988 	}
5989 	SKIP_BLANKS;
5990 	GROW;
5991 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5992 	    int type;
5993 	    int def;
5994 	    xmlChar *defaultValue = NULL;
5995 
5996 	    GROW;
5997             tree = NULL;
5998 	    attrName = xmlParseName(ctxt);
5999 	    if (attrName == NULL) {
6000 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6001 			       "ATTLIST: no name for Attribute\n");
6002 		break;
6003 	    }
6004 	    GROW;
6005 	    if (SKIP_BLANKS == 0) {
6006 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6007 		        "Space required after the attribute name\n");
6008 		break;
6009 	    }
6010 
6011 	    type = xmlParseAttributeType(ctxt, &tree);
6012 	    if (type <= 0) {
6013 	        break;
6014 	    }
6015 
6016 	    GROW;
6017 	    if (SKIP_BLANKS == 0) {
6018 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6019 			       "Space required after the attribute type\n");
6020 	        if (tree != NULL)
6021 		    xmlFreeEnumeration(tree);
6022 		break;
6023 	    }
6024 
6025 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6026 	    if (def <= 0) {
6027                 if (defaultValue != NULL)
6028 		    xmlFree(defaultValue);
6029 	        if (tree != NULL)
6030 		    xmlFreeEnumeration(tree);
6031 	        break;
6032 	    }
6033 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6034 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6035 
6036 	    GROW;
6037             if (RAW != '>') {
6038 		if (SKIP_BLANKS == 0) {
6039 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6040 			"Space required after the attribute default value\n");
6041 		    if (defaultValue != NULL)
6042 			xmlFree(defaultValue);
6043 		    if (tree != NULL)
6044 			xmlFreeEnumeration(tree);
6045 		    break;
6046 		}
6047 	    }
6048 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6049 		(ctxt->sax->attributeDecl != NULL))
6050 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6051 	                        type, def, defaultValue, tree);
6052 	    else if (tree != NULL)
6053 		xmlFreeEnumeration(tree);
6054 
6055 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6056 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6057 		(def != XML_ATTRIBUTE_REQUIRED)) {
6058 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6059 	    }
6060 	    if (ctxt->sax2) {
6061 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6062 	    }
6063 	    if (defaultValue != NULL)
6064 	        xmlFree(defaultValue);
6065 	    GROW;
6066 	}
6067 	if (RAW == '>') {
6068 	    if (inputid != ctxt->input->id) {
6069 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6070                                "Attribute list declaration doesn't start and"
6071                                " stop in the same entity\n");
6072 	    }
6073 	    NEXT;
6074 	}
6075     }
6076 }
6077 
6078 /**
6079  * xmlParseElementMixedContentDecl:
6080  * @ctxt:  an XML parser context
6081  * @inputchk:  the input used for the current entity, needed for boundary checks
6082  *
6083  * parse the declaration for a Mixed Element content
6084  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6085  *
6086  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6087  *                '(' S? '#PCDATA' S? ')'
6088  *
6089  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6090  *
6091  * [ VC: No Duplicate Types ]
6092  * The same name must not appear more than once in a single
6093  * mixed-content declaration.
6094  *
6095  * returns: the list of the xmlElementContentPtr describing the element choices
6096  */
6097 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6098 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6099     xmlElementContentPtr ret = NULL, cur = NULL, n;
6100     const xmlChar *elem = NULL;
6101 
6102     GROW;
6103     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6104 	SKIP(7);
6105 	SKIP_BLANKS;
6106 	SHRINK;
6107 	if (RAW == ')') {
6108 	    if (ctxt->input->id != inputchk) {
6109 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6110                                "Element content declaration doesn't start and"
6111                                " stop in the same entity\n");
6112 	    }
6113 	    NEXT;
6114 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6115 	    if (ret == NULL)
6116 	        return(NULL);
6117 	    if (RAW == '*') {
6118 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6119 		NEXT;
6120 	    }
6121 	    return(ret);
6122 	}
6123 	if ((RAW == '(') || (RAW == '|')) {
6124 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6125 	    if (ret == NULL) return(NULL);
6126 	}
6127 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6128 	    NEXT;
6129 	    if (elem == NULL) {
6130 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6131 		if (ret == NULL) {
6132 		    xmlFreeDocElementContent(ctxt->myDoc, cur);
6133                     return(NULL);
6134                 }
6135 		ret->c1 = cur;
6136 		if (cur != NULL)
6137 		    cur->parent = ret;
6138 		cur = ret;
6139 	    } else {
6140 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6141 		if (n == NULL) {
6142 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6143                     return(NULL);
6144                 }
6145 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6146 		if (n->c1 != NULL)
6147 		    n->c1->parent = n;
6148 	        cur->c2 = n;
6149 		if (n != NULL)
6150 		    n->parent = cur;
6151 		cur = n;
6152 	    }
6153 	    SKIP_BLANKS;
6154 	    elem = xmlParseName(ctxt);
6155 	    if (elem == NULL) {
6156 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6157 			"xmlParseElementMixedContentDecl : Name expected\n");
6158 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6159 		return(NULL);
6160 	    }
6161 	    SKIP_BLANKS;
6162 	    GROW;
6163 	}
6164 	if ((RAW == ')') && (NXT(1) == '*')) {
6165 	    if (elem != NULL) {
6166 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6167 		                               XML_ELEMENT_CONTENT_ELEMENT);
6168 		if (cur->c2 != NULL)
6169 		    cur->c2->parent = cur;
6170             }
6171             if (ret != NULL)
6172                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6173 	    if (ctxt->input->id != inputchk) {
6174 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6175                                "Element content declaration doesn't start and"
6176                                " stop in the same entity\n");
6177 	    }
6178 	    SKIP(2);
6179 	} else {
6180 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6181 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6182 	    return(NULL);
6183 	}
6184 
6185     } else {
6186 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6187     }
6188     return(ret);
6189 }
6190 
6191 /**
6192  * xmlParseElementChildrenContentDeclPriv:
6193  * @ctxt:  an XML parser context
6194  * @inputchk:  the input used for the current entity, needed for boundary checks
6195  * @depth: the level of recursion
6196  *
6197  * parse the declaration for a Mixed Element content
6198  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6199  *
6200  *
6201  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6202  *
6203  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6204  *
6205  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6206  *
6207  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6208  *
6209  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6210  * TODO Parameter-entity replacement text must be properly nested
6211  *	with parenthesized groups. That is to say, if either of the
6212  *	opening or closing parentheses in a choice, seq, or Mixed
6213  *	construct is contained in the replacement text for a parameter
6214  *	entity, both must be contained in the same replacement text. For
6215  *	interoperability, if a parameter-entity reference appears in a
6216  *	choice, seq, or Mixed construct, its replacement text should not
6217  *	be empty, and neither the first nor last non-blank character of
6218  *	the replacement text should be a connector (| or ,).
6219  *
6220  * Returns the tree of xmlElementContentPtr describing the element
6221  *          hierarchy.
6222  */
6223 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6224 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6225                                        int depth) {
6226     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6227     const xmlChar *elem;
6228     xmlChar type = 0;
6229 
6230     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6231         (depth >  2048)) {
6232         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6233 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6234                           depth);
6235 	return(NULL);
6236     }
6237     SKIP_BLANKS;
6238     GROW;
6239     if (RAW == '(') {
6240 	int inputid = ctxt->input->id;
6241 
6242         /* Recurse on first child */
6243 	NEXT;
6244 	SKIP_BLANKS;
6245         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6246                                                            depth + 1);
6247         if (cur == NULL)
6248             return(NULL);
6249 	SKIP_BLANKS;
6250 	GROW;
6251     } else {
6252 	elem = xmlParseName(ctxt);
6253 	if (elem == NULL) {
6254 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6255 	    return(NULL);
6256 	}
6257         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6258 	if (cur == NULL) {
6259 	    xmlErrMemory(ctxt, NULL);
6260 	    return(NULL);
6261 	}
6262 	GROW;
6263 	if (RAW == '?') {
6264 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6265 	    NEXT;
6266 	} else if (RAW == '*') {
6267 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6268 	    NEXT;
6269 	} else if (RAW == '+') {
6270 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6271 	    NEXT;
6272 	} else {
6273 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6274 	}
6275 	GROW;
6276     }
6277     SKIP_BLANKS;
6278     SHRINK;
6279     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6280         /*
6281 	 * Each loop we parse one separator and one element.
6282 	 */
6283         if (RAW == ',') {
6284 	    if (type == 0) type = CUR;
6285 
6286 	    /*
6287 	     * Detect "Name | Name , Name" error
6288 	     */
6289 	    else if (type != CUR) {
6290 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6291 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6292 		                  type);
6293 		if ((last != NULL) && (last != ret))
6294 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6295 		if (ret != NULL)
6296 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6297 		return(NULL);
6298 	    }
6299 	    NEXT;
6300 
6301 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6302 	    if (op == NULL) {
6303 		if ((last != NULL) && (last != ret))
6304 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6305 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6306 		return(NULL);
6307 	    }
6308 	    if (last == NULL) {
6309 		op->c1 = ret;
6310 		if (ret != NULL)
6311 		    ret->parent = op;
6312 		ret = cur = op;
6313 	    } else {
6314 	        cur->c2 = op;
6315 		if (op != NULL)
6316 		    op->parent = cur;
6317 		op->c1 = last;
6318 		if (last != NULL)
6319 		    last->parent = op;
6320 		cur =op;
6321 		last = NULL;
6322 	    }
6323 	} else if (RAW == '|') {
6324 	    if (type == 0) type = CUR;
6325 
6326 	    /*
6327 	     * Detect "Name , Name | Name" error
6328 	     */
6329 	    else if (type != CUR) {
6330 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6331 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6332 				  type);
6333 		if ((last != NULL) && (last != ret))
6334 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6335 		if (ret != NULL)
6336 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6337 		return(NULL);
6338 	    }
6339 	    NEXT;
6340 
6341 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6342 	    if (op == NULL) {
6343 		if ((last != NULL) && (last != ret))
6344 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6345 		if (ret != NULL)
6346 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6347 		return(NULL);
6348 	    }
6349 	    if (last == NULL) {
6350 		op->c1 = ret;
6351 		if (ret != NULL)
6352 		    ret->parent = op;
6353 		ret = cur = op;
6354 	    } else {
6355 	        cur->c2 = op;
6356 		if (op != NULL)
6357 		    op->parent = cur;
6358 		op->c1 = last;
6359 		if (last != NULL)
6360 		    last->parent = op;
6361 		cur =op;
6362 		last = NULL;
6363 	    }
6364 	} else {
6365 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6366 	    if ((last != NULL) && (last != ret))
6367 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6368 	    if (ret != NULL)
6369 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6370 	    return(NULL);
6371 	}
6372 	GROW;
6373 	SKIP_BLANKS;
6374 	GROW;
6375 	if (RAW == '(') {
6376 	    int inputid = ctxt->input->id;
6377 	    /* Recurse on second child */
6378 	    NEXT;
6379 	    SKIP_BLANKS;
6380 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6381                                                           depth + 1);
6382             if (last == NULL) {
6383 		if (ret != NULL)
6384 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6385 		return(NULL);
6386             }
6387 	    SKIP_BLANKS;
6388 	} else {
6389 	    elem = xmlParseName(ctxt);
6390 	    if (elem == NULL) {
6391 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6392 		if (ret != NULL)
6393 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6394 		return(NULL);
6395 	    }
6396 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6397 	    if (last == NULL) {
6398 		if (ret != NULL)
6399 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 		return(NULL);
6401 	    }
6402 	    if (RAW == '?') {
6403 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6404 		NEXT;
6405 	    } else if (RAW == '*') {
6406 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6407 		NEXT;
6408 	    } else if (RAW == '+') {
6409 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6410 		NEXT;
6411 	    } else {
6412 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6413 	    }
6414 	}
6415 	SKIP_BLANKS;
6416 	GROW;
6417     }
6418     if ((cur != NULL) && (last != NULL)) {
6419         cur->c2 = last;
6420 	if (last != NULL)
6421 	    last->parent = cur;
6422     }
6423     if (ctxt->input->id != inputchk) {
6424 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6425                        "Element content declaration doesn't start and stop in"
6426                        " the same entity\n");
6427     }
6428     NEXT;
6429     if (RAW == '?') {
6430 	if (ret != NULL) {
6431 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6432 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6433 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6434 	    else
6435 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6436 	}
6437 	NEXT;
6438     } else if (RAW == '*') {
6439 	if (ret != NULL) {
6440 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6441 	    cur = ret;
6442 	    /*
6443 	     * Some normalization:
6444 	     * (a | b* | c?)* == (a | b | c)*
6445 	     */
6446 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6447 		if ((cur->c1 != NULL) &&
6448 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6449 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6450 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6451 		if ((cur->c2 != NULL) &&
6452 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6453 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6454 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6455 		cur = cur->c2;
6456 	    }
6457 	}
6458 	NEXT;
6459     } else if (RAW == '+') {
6460 	if (ret != NULL) {
6461 	    int found = 0;
6462 
6463 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6464 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6465 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6466 	    else
6467 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6468 	    /*
6469 	     * Some normalization:
6470 	     * (a | b*)+ == (a | b)*
6471 	     * (a | b?)+ == (a | b)*
6472 	     */
6473 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6474 		if ((cur->c1 != NULL) &&
6475 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6477 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6478 		    found = 1;
6479 		}
6480 		if ((cur->c2 != NULL) &&
6481 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 		    found = 1;
6485 		}
6486 		cur = cur->c2;
6487 	    }
6488 	    if (found)
6489 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490 	}
6491 	NEXT;
6492     }
6493     return(ret);
6494 }
6495 
6496 /**
6497  * xmlParseElementChildrenContentDecl:
6498  * @ctxt:  an XML parser context
6499  * @inputchk:  the input used for the current entity, needed for boundary checks
6500  *
6501  * parse the declaration for a Mixed Element content
6502  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6503  *
6504  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6505  *
6506  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6507  *
6508  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6509  *
6510  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6511  *
6512  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6513  * TODO Parameter-entity replacement text must be properly nested
6514  *	with parenthesized groups. That is to say, if either of the
6515  *	opening or closing parentheses in a choice, seq, or Mixed
6516  *	construct is contained in the replacement text for a parameter
6517  *	entity, both must be contained in the same replacement text. For
6518  *	interoperability, if a parameter-entity reference appears in a
6519  *	choice, seq, or Mixed construct, its replacement text should not
6520  *	be empty, and neither the first nor last non-blank character of
6521  *	the replacement text should be a connector (| or ,).
6522  *
6523  * Returns the tree of xmlElementContentPtr describing the element
6524  *          hierarchy.
6525  */
6526 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6527 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6528     /* stub left for API/ABI compat */
6529     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6530 }
6531 
6532 /**
6533  * xmlParseElementContentDecl:
6534  * @ctxt:  an XML parser context
6535  * @name:  the name of the element being defined.
6536  * @result:  the Element Content pointer will be stored here if any
6537  *
6538  * parse the declaration for an Element content either Mixed or Children,
6539  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6540  *
6541  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6542  *
6543  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6544  */
6545 
6546 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6547 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6548                            xmlElementContentPtr *result) {
6549 
6550     xmlElementContentPtr tree = NULL;
6551     int inputid = ctxt->input->id;
6552     int res;
6553 
6554     *result = NULL;
6555 
6556     if (RAW != '(') {
6557 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6559 	return(-1);
6560     }
6561     NEXT;
6562     GROW;
6563     if (ctxt->instate == XML_PARSER_EOF)
6564         return(-1);
6565     SKIP_BLANKS;
6566     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6567         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6568 	res = XML_ELEMENT_TYPE_MIXED;
6569     } else {
6570         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6571 	res = XML_ELEMENT_TYPE_ELEMENT;
6572     }
6573     SKIP_BLANKS;
6574     *result = tree;
6575     return(res);
6576 }
6577 
6578 /**
6579  * xmlParseElementDecl:
6580  * @ctxt:  an XML parser context
6581  *
6582  * parse an Element declaration.
6583  *
6584  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6585  *
6586  * [ VC: Unique Element Type Declaration ]
6587  * No element type may be declared more than once
6588  *
6589  * Returns the type of the element, or -1 in case of error
6590  */
6591 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6592 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6593     const xmlChar *name;
6594     int ret = -1;
6595     xmlElementContentPtr content  = NULL;
6596 
6597     /* GROW; done in the caller */
6598     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599 	int inputid = ctxt->input->id;
6600 
6601 	SKIP(9);
6602 	if (SKIP_BLANKS == 0) {
6603 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604 		           "Space required after 'ELEMENT'\n");
6605 	    return(-1);
6606 	}
6607         name = xmlParseName(ctxt);
6608 	if (name == NULL) {
6609 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610 			   "xmlParseElementDecl: no name for Element\n");
6611 	    return(-1);
6612 	}
6613 	if (SKIP_BLANKS == 0) {
6614 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615 			   "Space required after the element name\n");
6616 	}
6617 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6618 	    SKIP(5);
6619 	    /*
6620 	     * Element must always be empty.
6621 	     */
6622 	    ret = XML_ELEMENT_TYPE_EMPTY;
6623 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624 	           (NXT(2) == 'Y')) {
6625 	    SKIP(3);
6626 	    /*
6627 	     * Element is a generic container.
6628 	     */
6629 	    ret = XML_ELEMENT_TYPE_ANY;
6630 	} else if (RAW == '(') {
6631 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6632 	} else {
6633 	    /*
6634 	     * [ WFC: PEs in Internal Subset ] error handling.
6635 	     */
6636 	    if ((RAW == '%') && (ctxt->external == 0) &&
6637 	        (ctxt->inputNr == 1)) {
6638 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6639 	  "PEReference: forbidden within markup decl in internal subset\n");
6640 	    } else {
6641 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643             }
6644 	    return(-1);
6645 	}
6646 
6647 	SKIP_BLANKS;
6648 
6649 	if (RAW != '>') {
6650 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6651 	    if (content != NULL) {
6652 		xmlFreeDocElementContent(ctxt->myDoc, content);
6653 	    }
6654 	} else {
6655 	    if (inputid != ctxt->input->id) {
6656 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657                                "Element declaration doesn't start and stop in"
6658                                " the same entity\n");
6659 	    }
6660 
6661 	    NEXT;
6662 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6663 		(ctxt->sax->elementDecl != NULL)) {
6664 		if (content != NULL)
6665 		    content->parent = NULL;
6666 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667 		                       content);
6668 		if ((content != NULL) && (content->parent == NULL)) {
6669 		    /*
6670 		     * this is a trick: if xmlAddElementDecl is called,
6671 		     * instead of copying the full tree it is plugged directly
6672 		     * if called from the parser. Avoid duplicating the
6673 		     * interfaces or change the API/ABI
6674 		     */
6675 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6676 		}
6677 	    } else if (content != NULL) {
6678 		xmlFreeDocElementContent(ctxt->myDoc, content);
6679 	    }
6680 	}
6681     }
6682     return(ret);
6683 }
6684 
6685 /**
6686  * xmlParseConditionalSections
6687  * @ctxt:  an XML parser context
6688  *
6689  * [61] conditionalSect ::= includeSect | ignoreSect
6690  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6691  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6692  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6693  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6694  */
6695 
6696 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6697 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6698     int *inputIds = NULL;
6699     size_t inputIdsSize = 0;
6700     size_t depth = 0;
6701 
6702     while (ctxt->instate != XML_PARSER_EOF) {
6703         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6704             int id = ctxt->input->id;
6705 
6706             SKIP(3);
6707             SKIP_BLANKS;
6708 
6709             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6710                 SKIP(7);
6711                 SKIP_BLANKS;
6712                 if (RAW != '[') {
6713                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6714                     xmlHaltParser(ctxt);
6715                     goto error;
6716                 }
6717                 if (ctxt->input->id != id) {
6718                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6719                                    "All markup of the conditional section is"
6720                                    " not in the same entity\n");
6721                 }
6722                 NEXT;
6723 
6724                 if (inputIdsSize <= depth) {
6725                     int *tmp;
6726 
6727                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6728                     tmp = (int *) xmlRealloc(inputIds,
6729                             inputIdsSize * sizeof(int));
6730                     if (tmp == NULL) {
6731                         xmlErrMemory(ctxt, NULL);
6732                         goto error;
6733                     }
6734                     inputIds = tmp;
6735                 }
6736                 inputIds[depth] = id;
6737                 depth++;
6738             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6739                 int state;
6740                 xmlParserInputState instate;
6741                 size_t ignoreDepth = 0;
6742 
6743                 SKIP(6);
6744                 SKIP_BLANKS;
6745                 if (RAW != '[') {
6746                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747                     xmlHaltParser(ctxt);
6748                     goto error;
6749                 }
6750                 if (ctxt->input->id != id) {
6751                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752                                    "All markup of the conditional section is"
6753                                    " not in the same entity\n");
6754                 }
6755                 NEXT;
6756 
6757                 /*
6758                  * Parse up to the end of the conditional section but disable
6759                  * SAX event generating DTD building in the meantime
6760                  */
6761                 state = ctxt->disableSAX;
6762                 instate = ctxt->instate;
6763                 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6764                 ctxt->instate = XML_PARSER_IGNORE;
6765 
6766                 while (RAW != 0) {
6767                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6768                         SKIP(3);
6769                         ignoreDepth++;
6770                         /* Check for integer overflow */
6771                         if (ignoreDepth == 0) {
6772                             xmlErrMemory(ctxt, NULL);
6773                             goto error;
6774                         }
6775                     } else if ((RAW == ']') && (NXT(1) == ']') &&
6776                                (NXT(2) == '>')) {
6777                         if (ignoreDepth == 0)
6778                             break;
6779                         SKIP(3);
6780                         ignoreDepth--;
6781                     } else {
6782                         NEXT;
6783                     }
6784                 }
6785 
6786                 ctxt->disableSAX = state;
6787                 ctxt->instate = instate;
6788 
6789 		if (RAW == 0) {
6790 		    xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6791                     goto error;
6792 		}
6793                 if (ctxt->input->id != id) {
6794                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6795                                    "All markup of the conditional section is"
6796                                    " not in the same entity\n");
6797                 }
6798                 SKIP(3);
6799             } else {
6800                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6801                 xmlHaltParser(ctxt);
6802                 goto error;
6803             }
6804         } else if ((depth > 0) &&
6805                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6806             depth--;
6807             if (ctxt->input->id != inputIds[depth]) {
6808                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6809                                "All markup of the conditional section is not"
6810                                " in the same entity\n");
6811             }
6812             SKIP(3);
6813         } else {
6814             const xmlChar *check = CUR_PTR;
6815             unsigned int cons = ctxt->input->consumed;
6816 
6817             xmlParseMarkupDecl(ctxt);
6818 
6819             if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6820                 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6821                 xmlHaltParser(ctxt);
6822                 goto error;
6823             }
6824         }
6825 
6826         if (depth == 0)
6827             break;
6828 
6829         SKIP_BLANKS;
6830         GROW;
6831     }
6832 
6833 error:
6834     xmlFree(inputIds);
6835 }
6836 
6837 /**
6838  * xmlParseMarkupDecl:
6839  * @ctxt:  an XML parser context
6840  *
6841  * parse Markup declarations
6842  *
6843  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6844  *                     NotationDecl | PI | Comment
6845  *
6846  * [ VC: Proper Declaration/PE Nesting ]
6847  * Parameter-entity replacement text must be properly nested with
6848  * markup declarations. That is to say, if either the first character
6849  * or the last character of a markup declaration (markupdecl above) is
6850  * contained in the replacement text for a parameter-entity reference,
6851  * both must be contained in the same replacement text.
6852  *
6853  * [ WFC: PEs in Internal Subset ]
6854  * In the internal DTD subset, parameter-entity references can occur
6855  * only where markup declarations can occur, not within markup declarations.
6856  * (This does not apply to references that occur in external parameter
6857  * entities or to the external subset.)
6858  */
6859 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6860 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6861     GROW;
6862     if (CUR == '<') {
6863         if (NXT(1) == '!') {
6864 	    switch (NXT(2)) {
6865 	        case 'E':
6866 		    if (NXT(3) == 'L')
6867 			xmlParseElementDecl(ctxt);
6868 		    else if (NXT(3) == 'N')
6869 			xmlParseEntityDecl(ctxt);
6870 		    break;
6871 	        case 'A':
6872 		    xmlParseAttributeListDecl(ctxt);
6873 		    break;
6874 	        case 'N':
6875 		    xmlParseNotationDecl(ctxt);
6876 		    break;
6877 	        case '-':
6878 		    xmlParseComment(ctxt);
6879 		    break;
6880 		default:
6881 		    /* there is an error but it will be detected later */
6882 		    break;
6883 	    }
6884 	} else if (NXT(1) == '?') {
6885 	    xmlParsePI(ctxt);
6886 	}
6887     }
6888 
6889     /*
6890      * detect requirement to exit there and act accordingly
6891      * and avoid having instate overridden later on
6892      */
6893     if (ctxt->instate == XML_PARSER_EOF)
6894         return;
6895 
6896     ctxt->instate = XML_PARSER_DTD;
6897 }
6898 
6899 /**
6900  * xmlParseTextDecl:
6901  * @ctxt:  an XML parser context
6902  *
6903  * parse an XML declaration header for external entities
6904  *
6905  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6906  */
6907 
6908 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6909 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6910     xmlChar *version;
6911     const xmlChar *encoding;
6912     int oldstate;
6913 
6914     /*
6915      * We know that '<?xml' is here.
6916      */
6917     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6918 	SKIP(5);
6919     } else {
6920 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6921 	return;
6922     }
6923 
6924     /* Avoid expansion of parameter entities when skipping blanks. */
6925     oldstate = ctxt->instate;
6926     ctxt->instate = XML_PARSER_START;
6927 
6928     if (SKIP_BLANKS == 0) {
6929 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6930 		       "Space needed after '<?xml'\n");
6931     }
6932 
6933     /*
6934      * We may have the VersionInfo here.
6935      */
6936     version = xmlParseVersionInfo(ctxt);
6937     if (version == NULL)
6938 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6939     else {
6940 	if (SKIP_BLANKS == 0) {
6941 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 		           "Space needed here\n");
6943 	}
6944     }
6945     ctxt->input->version = version;
6946 
6947     /*
6948      * We must have the encoding declaration
6949      */
6950     encoding = xmlParseEncodingDecl(ctxt);
6951     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6952 	/*
6953 	 * The XML REC instructs us to stop parsing right here
6954 	 */
6955         ctxt->instate = oldstate;
6956         return;
6957     }
6958     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6959 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6960 		       "Missing encoding in text declaration\n");
6961     }
6962 
6963     SKIP_BLANKS;
6964     if ((RAW == '?') && (NXT(1) == '>')) {
6965         SKIP(2);
6966     } else if (RAW == '>') {
6967         /* Deprecated old WD ... */
6968 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6969 	NEXT;
6970     } else {
6971 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6972 	MOVETO_ENDTAG(CUR_PTR);
6973 	NEXT;
6974     }
6975 
6976     ctxt->instate = oldstate;
6977 }
6978 
6979 /**
6980  * xmlParseExternalSubset:
6981  * @ctxt:  an XML parser context
6982  * @ExternalID: the external identifier
6983  * @SystemID: the system identifier (or URL)
6984  *
6985  * parse Markup declarations from an external subset
6986  *
6987  * [30] extSubset ::= textDecl? extSubsetDecl
6988  *
6989  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6990  */
6991 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6992 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6993                        const xmlChar *SystemID) {
6994     xmlDetectSAX2(ctxt);
6995     GROW;
6996 
6997     if ((ctxt->encoding == NULL) &&
6998         (ctxt->input->end - ctxt->input->cur >= 4)) {
6999         xmlChar start[4];
7000 	xmlCharEncoding enc;
7001 
7002 	start[0] = RAW;
7003 	start[1] = NXT(1);
7004 	start[2] = NXT(2);
7005 	start[3] = NXT(3);
7006 	enc = xmlDetectCharEncoding(start, 4);
7007 	if (enc != XML_CHAR_ENCODING_NONE)
7008 	    xmlSwitchEncoding(ctxt, enc);
7009     }
7010 
7011     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7012 	xmlParseTextDecl(ctxt);
7013 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7014 	    /*
7015 	     * The XML REC instructs us to stop parsing right here
7016 	     */
7017 	    xmlHaltParser(ctxt);
7018 	    return;
7019 	}
7020     }
7021     if (ctxt->myDoc == NULL) {
7022         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7023 	if (ctxt->myDoc == NULL) {
7024 	    xmlErrMemory(ctxt, "New Doc failed");
7025 	    return;
7026 	}
7027 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7028     }
7029     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7030         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7031 
7032     ctxt->instate = XML_PARSER_DTD;
7033     ctxt->external = 1;
7034     SKIP_BLANKS;
7035     while (((RAW == '<') && (NXT(1) == '?')) ||
7036            ((RAW == '<') && (NXT(1) == '!')) ||
7037 	   (RAW == '%')) {
7038 	const xmlChar *check = CUR_PTR;
7039 	unsigned int cons = ctxt->input->consumed;
7040 
7041 	GROW;
7042         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7043 	    xmlParseConditionalSections(ctxt);
7044 	} else
7045 	    xmlParseMarkupDecl(ctxt);
7046         SKIP_BLANKS;
7047 
7048 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7049 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7050 	    break;
7051 	}
7052     }
7053 
7054     if (RAW != 0) {
7055 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056     }
7057 
7058 }
7059 
7060 /**
7061  * xmlParseReference:
7062  * @ctxt:  an XML parser context
7063  *
7064  * parse and handle entity references in content, depending on the SAX
7065  * interface, this may end-up in a call to character() if this is a
7066  * CharRef, a predefined entity, if there is no reference() callback.
7067  * or if the parser was asked to switch to that mode.
7068  *
7069  * [67] Reference ::= EntityRef | CharRef
7070  */
7071 void
xmlParseReference(xmlParserCtxtPtr ctxt)7072 xmlParseReference(xmlParserCtxtPtr ctxt) {
7073     xmlEntityPtr ent;
7074     xmlChar *val;
7075     int was_checked;
7076     xmlNodePtr list = NULL;
7077     xmlParserErrors ret = XML_ERR_OK;
7078 
7079 
7080     if (RAW != '&')
7081         return;
7082 
7083     /*
7084      * Simple case of a CharRef
7085      */
7086     if (NXT(1) == '#') {
7087 	int i = 0;
7088 	xmlChar out[16];
7089 	int hex = NXT(2);
7090 	int value = xmlParseCharRef(ctxt);
7091 
7092 	if (value == 0)
7093 	    return;
7094 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7095 	    /*
7096 	     * So we are using non-UTF-8 buffers
7097 	     * Check that the char fit on 8bits, if not
7098 	     * generate a CharRef.
7099 	     */
7100 	    if (value <= 0xFF) {
7101 		out[0] = value;
7102 		out[1] = 0;
7103 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7104 		    (!ctxt->disableSAX))
7105 		    ctxt->sax->characters(ctxt->userData, out, 1);
7106 	    } else {
7107 		if ((hex == 'x') || (hex == 'X'))
7108 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7109 		else
7110 		    snprintf((char *)out, sizeof(out), "#%d", value);
7111 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7112 		    (!ctxt->disableSAX))
7113 		    ctxt->sax->reference(ctxt->userData, out);
7114 	    }
7115 	} else {
7116 	    /*
7117 	     * Just encode the value in UTF-8
7118 	     */
7119 	    COPY_BUF(0 ,out, i, value);
7120 	    out[i] = 0;
7121 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7122 		(!ctxt->disableSAX))
7123 		ctxt->sax->characters(ctxt->userData, out, i);
7124 	}
7125 	return;
7126     }
7127 
7128     /*
7129      * We are seeing an entity reference
7130      */
7131     ent = xmlParseEntityRef(ctxt);
7132     if (ent == NULL) return;
7133     if (!ctxt->wellFormed)
7134 	return;
7135     was_checked = ent->checked;
7136 
7137     /* special case of predefined entities */
7138     if ((ent->name == NULL) ||
7139         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7140 	val = ent->content;
7141 	if (val == NULL) return;
7142 	/*
7143 	 * inline the entity.
7144 	 */
7145 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7146 	    (!ctxt->disableSAX))
7147 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7148 	return;
7149     }
7150 
7151     /*
7152      * The first reference to the entity trigger a parsing phase
7153      * where the ent->children is filled with the result from
7154      * the parsing.
7155      * Note: external parsed entities will not be loaded, it is not
7156      * required for a non-validating parser, unless the parsing option
7157      * of validating, or substituting entities were given. Doing so is
7158      * far more secure as the parser will only process data coming from
7159      * the document entity by default.
7160      */
7161     if (((ent->checked == 0) ||
7162          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7163         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7164          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7165 	unsigned long oldnbent = ctxt->nbentities, diff;
7166 
7167 	/*
7168 	 * This is a bit hackish but this seems the best
7169 	 * way to make sure both SAX and DOM entity support
7170 	 * behaves okay.
7171 	 */
7172 	void *user_data;
7173 	if (ctxt->userData == ctxt)
7174 	    user_data = NULL;
7175 	else
7176 	    user_data = ctxt->userData;
7177 
7178 	/*
7179 	 * Check that this entity is well formed
7180 	 * 4.3.2: An internal general parsed entity is well-formed
7181 	 * if its replacement text matches the production labeled
7182 	 * content.
7183 	 */
7184 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7185 	    ctxt->depth++;
7186 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7187 	                                              user_data, &list);
7188 	    ctxt->depth--;
7189 
7190 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7191 	    ctxt->depth++;
7192 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7193 	                                   user_data, ctxt->depth, ent->URI,
7194 					   ent->ExternalID, &list);
7195 	    ctxt->depth--;
7196 	} else {
7197 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7198 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7199 			 "invalid entity type found\n", NULL);
7200 	}
7201 
7202 	/*
7203 	 * Store the number of entities needing parsing for this entity
7204 	 * content and do checkings
7205 	 */
7206         diff = ctxt->nbentities - oldnbent + 1;
7207         if (diff > INT_MAX / 2)
7208             diff = INT_MAX / 2;
7209         ent->checked = diff * 2;
7210 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7211 	    ent->checked |= 1;
7212 	if (ret == XML_ERR_ENTITY_LOOP) {
7213 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7214             xmlHaltParser(ctxt);
7215 	    xmlFreeNodeList(list);
7216 	    return;
7217 	}
7218 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7219 	    xmlFreeNodeList(list);
7220 	    return;
7221 	}
7222 
7223 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7224 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7225 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7226 		(ent->children == NULL)) {
7227 		ent->children = list;
7228                 /*
7229                  * Prune it directly in the generated document
7230                  * except for single text nodes.
7231                  */
7232                 if ((ctxt->replaceEntities == 0) ||
7233                     (ctxt->parseMode == XML_PARSE_READER) ||
7234                     ((list->type == XML_TEXT_NODE) &&
7235                      (list->next == NULL))) {
7236                     ent->owner = 1;
7237                     while (list != NULL) {
7238                         list->parent = (xmlNodePtr) ent;
7239                         xmlSetTreeDoc(list, ent->doc);
7240                         if (list->next == NULL)
7241                             ent->last = list;
7242                         list = list->next;
7243                     }
7244                     list = NULL;
7245                 } else {
7246                     ent->owner = 0;
7247                     while (list != NULL) {
7248                         list->parent = (xmlNodePtr) ctxt->node;
7249                         list->doc = ctxt->myDoc;
7250                         if (list->next == NULL)
7251                             ent->last = list;
7252                         list = list->next;
7253                     }
7254                     list = ent->children;
7255 #ifdef LIBXML_LEGACY_ENABLED
7256                     if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7257                         xmlAddEntityReference(ent, list, NULL);
7258 #endif /* LIBXML_LEGACY_ENABLED */
7259                 }
7260 	    } else {
7261 		xmlFreeNodeList(list);
7262 		list = NULL;
7263 	    }
7264 	} else if ((ret != XML_ERR_OK) &&
7265 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7266 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7267 		     "Entity '%s' failed to parse\n", ent->name);
7268             if (ent->content != NULL)
7269                 ent->content[0] = 0;
7270 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7271 	} else if (list != NULL) {
7272 	    xmlFreeNodeList(list);
7273 	    list = NULL;
7274 	}
7275 	if (ent->checked == 0)
7276 	    ent->checked = 2;
7277 
7278         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7279         was_checked = 0;
7280     } else if (ent->checked != 1) {
7281 	ctxt->nbentities += ent->checked / 2;
7282     }
7283 
7284     /*
7285      * Now that the entity content has been gathered
7286      * provide it to the application, this can take different forms based
7287      * on the parsing modes.
7288      */
7289     if (ent->children == NULL) {
7290 	/*
7291 	 * Probably running in SAX mode and the callbacks don't
7292 	 * build the entity content. So unless we already went
7293 	 * though parsing for first checking go though the entity
7294 	 * content to generate callbacks associated to the entity
7295 	 */
7296 	if (was_checked != 0) {
7297 	    void *user_data;
7298 	    /*
7299 	     * This is a bit hackish but this seems the best
7300 	     * way to make sure both SAX and DOM entity support
7301 	     * behaves okay.
7302 	     */
7303 	    if (ctxt->userData == ctxt)
7304 		user_data = NULL;
7305 	    else
7306 		user_data = ctxt->userData;
7307 
7308 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7309 		ctxt->depth++;
7310 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7311 				   ent->content, user_data, NULL);
7312 		ctxt->depth--;
7313 	    } else if (ent->etype ==
7314 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7315 		ctxt->depth++;
7316 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7317 			   ctxt->sax, user_data, ctxt->depth,
7318 			   ent->URI, ent->ExternalID, NULL);
7319 		ctxt->depth--;
7320 	    } else {
7321 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7322 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7323 			     "invalid entity type found\n", NULL);
7324 	    }
7325 	    if (ret == XML_ERR_ENTITY_LOOP) {
7326 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7327 		return;
7328 	    }
7329 	}
7330 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7331 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7332 	    /*
7333 	     * Entity reference callback comes second, it's somewhat
7334 	     * superfluous but a compatibility to historical behaviour
7335 	     */
7336 	    ctxt->sax->reference(ctxt->userData, ent->name);
7337 	}
7338 	return;
7339     }
7340 
7341     /*
7342      * If we didn't get any children for the entity being built
7343      */
7344     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7345 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7346 	/*
7347 	 * Create a node.
7348 	 */
7349 	ctxt->sax->reference(ctxt->userData, ent->name);
7350 	return;
7351     }
7352 
7353     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7354 	/*
7355 	 * There is a problem on the handling of _private for entities
7356 	 * (bug 155816): Should we copy the content of the field from
7357 	 * the entity (possibly overwriting some value set by the user
7358 	 * when a copy is created), should we leave it alone, or should
7359 	 * we try to take care of different situations?  The problem
7360 	 * is exacerbated by the usage of this field by the xmlReader.
7361 	 * To fix this bug, we look at _private on the created node
7362 	 * and, if it's NULL, we copy in whatever was in the entity.
7363 	 * If it's not NULL we leave it alone.  This is somewhat of a
7364 	 * hack - maybe we should have further tests to determine
7365 	 * what to do.
7366 	 */
7367 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7368 	    /*
7369 	     * Seems we are generating the DOM content, do
7370 	     * a simple tree copy for all references except the first
7371 	     * In the first occurrence list contains the replacement.
7372 	     */
7373 	    if (((list == NULL) && (ent->owner == 0)) ||
7374 		(ctxt->parseMode == XML_PARSE_READER)) {
7375 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7376 
7377 		/*
7378 		 * We are copying here, make sure there is no abuse
7379 		 */
7380 		ctxt->sizeentcopy += ent->length + 5;
7381 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7382 		    return;
7383 
7384 		/*
7385 		 * when operating on a reader, the entities definitions
7386 		 * are always owning the entities subtree.
7387 		if (ctxt->parseMode == XML_PARSE_READER)
7388 		    ent->owner = 1;
7389 		 */
7390 
7391 		cur = ent->children;
7392 		while (cur != NULL) {
7393 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7394 		    if (nw != NULL) {
7395 			if (nw->_private == NULL)
7396 			    nw->_private = cur->_private;
7397 			if (firstChild == NULL){
7398 			    firstChild = nw;
7399 			}
7400 			nw = xmlAddChild(ctxt->node, nw);
7401 		    }
7402 		    if (cur == ent->last) {
7403 			/*
7404 			 * needed to detect some strange empty
7405 			 * node cases in the reader tests
7406 			 */
7407 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7408 			    (nw != NULL) &&
7409 			    (nw->type == XML_ELEMENT_NODE) &&
7410 			    (nw->children == NULL))
7411 			    nw->extra = 1;
7412 
7413 			break;
7414 		    }
7415 		    cur = cur->next;
7416 		}
7417 #ifdef LIBXML_LEGACY_ENABLED
7418 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419 		  xmlAddEntityReference(ent, firstChild, nw);
7420 #endif /* LIBXML_LEGACY_ENABLED */
7421 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7422 		xmlNodePtr nw = NULL, cur, next, last,
7423 			   firstChild = NULL;
7424 
7425 		/*
7426 		 * We are copying here, make sure there is no abuse
7427 		 */
7428 		ctxt->sizeentcopy += ent->length + 5;
7429 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7430 		    return;
7431 
7432 		/*
7433 		 * Copy the entity child list and make it the new
7434 		 * entity child list. The goal is to make sure any
7435 		 * ID or REF referenced will be the one from the
7436 		 * document content and not the entity copy.
7437 		 */
7438 		cur = ent->children;
7439 		ent->children = NULL;
7440 		last = ent->last;
7441 		ent->last = NULL;
7442 		while (cur != NULL) {
7443 		    next = cur->next;
7444 		    cur->next = NULL;
7445 		    cur->parent = NULL;
7446 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7447 		    if (nw != NULL) {
7448 			if (nw->_private == NULL)
7449 			    nw->_private = cur->_private;
7450 			if (firstChild == NULL){
7451 			    firstChild = cur;
7452 			}
7453 			xmlAddChild((xmlNodePtr) ent, nw);
7454 			xmlAddChild(ctxt->node, cur);
7455 		    }
7456 		    if (cur == last)
7457 			break;
7458 		    cur = next;
7459 		}
7460 		if (ent->owner == 0)
7461 		    ent->owner = 1;
7462 #ifdef LIBXML_LEGACY_ENABLED
7463 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7464 		  xmlAddEntityReference(ent, firstChild, nw);
7465 #endif /* LIBXML_LEGACY_ENABLED */
7466 	    } else {
7467 		const xmlChar *nbktext;
7468 
7469 		/*
7470 		 * the name change is to avoid coalescing of the
7471 		 * node with a possible previous text one which
7472 		 * would make ent->children a dangling pointer
7473 		 */
7474 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7475 					-1);
7476 		if (ent->children->type == XML_TEXT_NODE)
7477 		    ent->children->name = nbktext;
7478 		if ((ent->last != ent->children) &&
7479 		    (ent->last->type == XML_TEXT_NODE))
7480 		    ent->last->name = nbktext;
7481 		xmlAddChildList(ctxt->node, ent->children);
7482 	    }
7483 
7484 	    /*
7485 	     * This is to avoid a nasty side effect, see
7486 	     * characters() in SAX.c
7487 	     */
7488 	    ctxt->nodemem = 0;
7489 	    ctxt->nodelen = 0;
7490 	    return;
7491 	}
7492     }
7493 }
7494 
7495 /**
7496  * xmlParseEntityRef:
7497  * @ctxt:  an XML parser context
7498  *
7499  * parse ENTITY references declarations
7500  *
7501  * [68] EntityRef ::= '&' Name ';'
7502  *
7503  * [ WFC: Entity Declared ]
7504  * In a document without any DTD, a document with only an internal DTD
7505  * subset which contains no parameter entity references, or a document
7506  * with "standalone='yes'", the Name given in the entity reference
7507  * must match that in an entity declaration, except that well-formed
7508  * documents need not declare any of the following entities: amp, lt,
7509  * gt, apos, quot.  The declaration of a parameter entity must precede
7510  * any reference to it.  Similarly, the declaration of a general entity
7511  * must precede any reference to it which appears in a default value in an
7512  * attribute-list declaration. Note that if entities are declared in the
7513  * external subset or in external parameter entities, a non-validating
7514  * processor is not obligated to read and process their declarations;
7515  * for such documents, the rule that an entity must be declared is a
7516  * well-formedness constraint only if standalone='yes'.
7517  *
7518  * [ WFC: Parsed Entity ]
7519  * An entity reference must not contain the name of an unparsed entity
7520  *
7521  * Returns the xmlEntityPtr if found, or NULL otherwise.
7522  */
7523 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7524 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7525     const xmlChar *name;
7526     xmlEntityPtr ent = NULL;
7527 
7528     GROW;
7529     if (ctxt->instate == XML_PARSER_EOF)
7530         return(NULL);
7531 
7532     if (RAW != '&')
7533         return(NULL);
7534     NEXT;
7535     name = xmlParseName(ctxt);
7536     if (name == NULL) {
7537 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7538 		       "xmlParseEntityRef: no name\n");
7539         return(NULL);
7540     }
7541     if (RAW != ';') {
7542 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7543 	return(NULL);
7544     }
7545     NEXT;
7546 
7547     /*
7548      * Predefined entities override any extra definition
7549      */
7550     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7551         ent = xmlGetPredefinedEntity(name);
7552         if (ent != NULL)
7553             return(ent);
7554     }
7555 
7556     /*
7557      * Increase the number of entity references parsed
7558      */
7559     ctxt->nbentities++;
7560 
7561     /*
7562      * Ask first SAX for entity resolution, otherwise try the
7563      * entities which may have stored in the parser context.
7564      */
7565     if (ctxt->sax != NULL) {
7566 	if (ctxt->sax->getEntity != NULL)
7567 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7568 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7569 	    (ctxt->options & XML_PARSE_OLDSAX))
7570 	    ent = xmlGetPredefinedEntity(name);
7571 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7572 	    (ctxt->userData==ctxt)) {
7573 	    ent = xmlSAX2GetEntity(ctxt, name);
7574 	}
7575     }
7576     if (ctxt->instate == XML_PARSER_EOF)
7577 	return(NULL);
7578     /*
7579      * [ WFC: Entity Declared ]
7580      * In a document without any DTD, a document with only an
7581      * internal DTD subset which contains no parameter entity
7582      * references, or a document with "standalone='yes'", the
7583      * Name given in the entity reference must match that in an
7584      * entity declaration, except that well-formed documents
7585      * need not declare any of the following entities: amp, lt,
7586      * gt, apos, quot.
7587      * The declaration of a parameter entity must precede any
7588      * reference to it.
7589      * Similarly, the declaration of a general entity must
7590      * precede any reference to it which appears in a default
7591      * value in an attribute-list declaration. Note that if
7592      * entities are declared in the external subset or in
7593      * external parameter entities, a non-validating processor
7594      * is not obligated to read and process their declarations;
7595      * for such documents, the rule that an entity must be
7596      * declared is a well-formedness constraint only if
7597      * standalone='yes'.
7598      */
7599     if (ent == NULL) {
7600 	if ((ctxt->standalone == 1) ||
7601 	    ((ctxt->hasExternalSubset == 0) &&
7602 	     (ctxt->hasPErefs == 0))) {
7603 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 		     "Entity '%s' not defined\n", name);
7605 	} else {
7606 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 		     "Entity '%s' not defined\n", name);
7608 	    if ((ctxt->inSubset == 0) &&
7609 		(ctxt->sax != NULL) &&
7610 		(ctxt->sax->reference != NULL)) {
7611 		ctxt->sax->reference(ctxt->userData, name);
7612 	    }
7613 	}
7614 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7615 	ctxt->valid = 0;
7616     }
7617 
7618     /*
7619      * [ WFC: Parsed Entity ]
7620      * An entity reference must not contain the name of an
7621      * unparsed entity
7622      */
7623     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625 		 "Entity reference to unparsed entity %s\n", name);
7626     }
7627 
7628     /*
7629      * [ WFC: No External Entity References ]
7630      * Attribute values cannot contain direct or indirect
7631      * entity references to external entities.
7632      */
7633     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7634 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7635 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7636 	     "Attribute references external entity '%s'\n", name);
7637     }
7638     /*
7639      * [ WFC: No < in Attribute Values ]
7640      * The replacement text of any entity referred to directly or
7641      * indirectly in an attribute value (other than "&lt;") must
7642      * not contain a <.
7643      */
7644     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7645 	     (ent != NULL) &&
7646 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7647 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7648 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7649 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7650 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7651         }
7652     }
7653 
7654     /*
7655      * Internal check, no parameter entities here ...
7656      */
7657     else {
7658 	switch (ent->etype) {
7659 	    case XML_INTERNAL_PARAMETER_ENTITY:
7660 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7661 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7662 	     "Attempt to reference the parameter entity '%s'\n",
7663 			      name);
7664 	    break;
7665 	    default:
7666 	    break;
7667 	}
7668     }
7669 
7670     /*
7671      * [ WFC: No Recursion ]
7672      * A parsed entity must not contain a recursive reference
7673      * to itself, either directly or indirectly.
7674      * Done somewhere else
7675      */
7676     return(ent);
7677 }
7678 
7679 /**
7680  * xmlParseStringEntityRef:
7681  * @ctxt:  an XML parser context
7682  * @str:  a pointer to an index in the string
7683  *
7684  * parse ENTITY references declarations, but this version parses it from
7685  * a string value.
7686  *
7687  * [68] EntityRef ::= '&' Name ';'
7688  *
7689  * [ WFC: Entity Declared ]
7690  * In a document without any DTD, a document with only an internal DTD
7691  * subset which contains no parameter entity references, or a document
7692  * with "standalone='yes'", the Name given in the entity reference
7693  * must match that in an entity declaration, except that well-formed
7694  * documents need not declare any of the following entities: amp, lt,
7695  * gt, apos, quot.  The declaration of a parameter entity must precede
7696  * any reference to it.  Similarly, the declaration of a general entity
7697  * must precede any reference to it which appears in a default value in an
7698  * attribute-list declaration. Note that if entities are declared in the
7699  * external subset or in external parameter entities, a non-validating
7700  * processor is not obligated to read and process their declarations;
7701  * for such documents, the rule that an entity must be declared is a
7702  * well-formedness constraint only if standalone='yes'.
7703  *
7704  * [ WFC: Parsed Entity ]
7705  * An entity reference must not contain the name of an unparsed entity
7706  *
7707  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7708  * is updated to the current location in the string.
7709  */
7710 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7711 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7712     xmlChar *name;
7713     const xmlChar *ptr;
7714     xmlChar cur;
7715     xmlEntityPtr ent = NULL;
7716 
7717     if ((str == NULL) || (*str == NULL))
7718         return(NULL);
7719     ptr = *str;
7720     cur = *ptr;
7721     if (cur != '&')
7722 	return(NULL);
7723 
7724     ptr++;
7725     name = xmlParseStringName(ctxt, &ptr);
7726     if (name == NULL) {
7727 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7728 		       "xmlParseStringEntityRef: no name\n");
7729 	*str = ptr;
7730 	return(NULL);
7731     }
7732     if (*ptr != ';') {
7733 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7734         xmlFree(name);
7735 	*str = ptr;
7736 	return(NULL);
7737     }
7738     ptr++;
7739 
7740 
7741     /*
7742      * Predefined entities override any extra definition
7743      */
7744     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7745         ent = xmlGetPredefinedEntity(name);
7746         if (ent != NULL) {
7747             xmlFree(name);
7748             *str = ptr;
7749             return(ent);
7750         }
7751     }
7752 
7753     /*
7754      * Increase the number of entity references parsed
7755      */
7756     ctxt->nbentities++;
7757 
7758     /*
7759      * Ask first SAX for entity resolution, otherwise try the
7760      * entities which may have stored in the parser context.
7761      */
7762     if (ctxt->sax != NULL) {
7763 	if (ctxt->sax->getEntity != NULL)
7764 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7765 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7766 	    ent = xmlGetPredefinedEntity(name);
7767 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7768 	    ent = xmlSAX2GetEntity(ctxt, name);
7769 	}
7770     }
7771     if (ctxt->instate == XML_PARSER_EOF) {
7772 	xmlFree(name);
7773 	return(NULL);
7774     }
7775 
7776     /*
7777      * [ WFC: Entity Declared ]
7778      * In a document without any DTD, a document with only an
7779      * internal DTD subset which contains no parameter entity
7780      * references, or a document with "standalone='yes'", the
7781      * Name given in the entity reference must match that in an
7782      * entity declaration, except that well-formed documents
7783      * need not declare any of the following entities: amp, lt,
7784      * gt, apos, quot.
7785      * The declaration of a parameter entity must precede any
7786      * reference to it.
7787      * Similarly, the declaration of a general entity must
7788      * precede any reference to it which appears in a default
7789      * value in an attribute-list declaration. Note that if
7790      * entities are declared in the external subset or in
7791      * external parameter entities, a non-validating processor
7792      * is not obligated to read and process their declarations;
7793      * for such documents, the rule that an entity must be
7794      * declared is a well-formedness constraint only if
7795      * standalone='yes'.
7796      */
7797     if (ent == NULL) {
7798 	if ((ctxt->standalone == 1) ||
7799 	    ((ctxt->hasExternalSubset == 0) &&
7800 	     (ctxt->hasPErefs == 0))) {
7801 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7802 		     "Entity '%s' not defined\n", name);
7803 	} else {
7804 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7805 			  "Entity '%s' not defined\n",
7806 			  name);
7807 	}
7808 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7809 	/* TODO ? check regressions ctxt->valid = 0; */
7810     }
7811 
7812     /*
7813      * [ WFC: Parsed Entity ]
7814      * An entity reference must not contain the name of an
7815      * unparsed entity
7816      */
7817     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7818 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7819 		 "Entity reference to unparsed entity %s\n", name);
7820     }
7821 
7822     /*
7823      * [ WFC: No External Entity References ]
7824      * Attribute values cannot contain direct or indirect
7825      * entity references to external entities.
7826      */
7827     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7828 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7829 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7830 	 "Attribute references external entity '%s'\n", name);
7831     }
7832     /*
7833      * [ WFC: No < in Attribute Values ]
7834      * The replacement text of any entity referred to directly or
7835      * indirectly in an attribute value (other than "&lt;") must
7836      * not contain a <.
7837      */
7838     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7839 	     (ent != NULL) && (ent->content != NULL) &&
7840 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7841 	     (xmlStrchr(ent->content, '<'))) {
7842 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7843      "'<' in entity '%s' is not allowed in attributes values\n",
7844 			  name);
7845     }
7846 
7847     /*
7848      * Internal check, no parameter entities here ...
7849      */
7850     else {
7851 	switch (ent->etype) {
7852 	    case XML_INTERNAL_PARAMETER_ENTITY:
7853 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7854 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7855 	     "Attempt to reference the parameter entity '%s'\n",
7856 				  name);
7857 	    break;
7858 	    default:
7859 	    break;
7860 	}
7861     }
7862 
7863     /*
7864      * [ WFC: No Recursion ]
7865      * A parsed entity must not contain a recursive reference
7866      * to itself, either directly or indirectly.
7867      * Done somewhere else
7868      */
7869 
7870     xmlFree(name);
7871     *str = ptr;
7872     return(ent);
7873 }
7874 
7875 /**
7876  * xmlParsePEReference:
7877  * @ctxt:  an XML parser context
7878  *
7879  * parse PEReference declarations
7880  * The entity content is handled directly by pushing it's content as
7881  * a new input stream.
7882  *
7883  * [69] PEReference ::= '%' Name ';'
7884  *
7885  * [ WFC: No Recursion ]
7886  * A parsed entity must not contain a recursive
7887  * reference to itself, either directly or indirectly.
7888  *
7889  * [ WFC: Entity Declared ]
7890  * In a document without any DTD, a document with only an internal DTD
7891  * subset which contains no parameter entity references, or a document
7892  * with "standalone='yes'", ...  ... The declaration of a parameter
7893  * entity must precede any reference to it...
7894  *
7895  * [ VC: Entity Declared ]
7896  * In a document with an external subset or external parameter entities
7897  * with "standalone='no'", ...  ... The declaration of a parameter entity
7898  * must precede any reference to it...
7899  *
7900  * [ WFC: In DTD ]
7901  * Parameter-entity references may only appear in the DTD.
7902  * NOTE: misleading but this is handled.
7903  */
7904 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7905 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7906 {
7907     const xmlChar *name;
7908     xmlEntityPtr entity = NULL;
7909     xmlParserInputPtr input;
7910 
7911     if (RAW != '%')
7912         return;
7913     NEXT;
7914     name = xmlParseName(ctxt);
7915     if (name == NULL) {
7916 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7917 	return;
7918     }
7919     if (xmlParserDebugEntities)
7920 	xmlGenericError(xmlGenericErrorContext,
7921 		"PEReference: %s\n", name);
7922     if (RAW != ';') {
7923 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7924         return;
7925     }
7926 
7927     NEXT;
7928 
7929     /*
7930      * Increase the number of entity references parsed
7931      */
7932     ctxt->nbentities++;
7933 
7934     /*
7935      * Request the entity from SAX
7936      */
7937     if ((ctxt->sax != NULL) &&
7938 	(ctxt->sax->getParameterEntity != NULL))
7939 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7940     if (ctxt->instate == XML_PARSER_EOF)
7941 	return;
7942     if (entity == NULL) {
7943 	/*
7944 	 * [ WFC: Entity Declared ]
7945 	 * In a document without any DTD, a document with only an
7946 	 * internal DTD subset which contains no parameter entity
7947 	 * references, or a document with "standalone='yes'", ...
7948 	 * ... The declaration of a parameter entity must precede
7949 	 * any reference to it...
7950 	 */
7951 	if ((ctxt->standalone == 1) ||
7952 	    ((ctxt->hasExternalSubset == 0) &&
7953 	     (ctxt->hasPErefs == 0))) {
7954 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7955 			      "PEReference: %%%s; not found\n",
7956 			      name);
7957 	} else {
7958 	    /*
7959 	     * [ VC: Entity Declared ]
7960 	     * In a document with an external subset or external
7961 	     * parameter entities with "standalone='no'", ...
7962 	     * ... The declaration of a parameter entity must
7963 	     * precede any reference to it...
7964 	     */
7965             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7966                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967                                  "PEReference: %%%s; not found\n",
7968                                  name, NULL);
7969             } else
7970                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7971                               "PEReference: %%%s; not found\n",
7972                               name, NULL);
7973             ctxt->valid = 0;
7974 	}
7975 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7976     } else {
7977 	/*
7978 	 * Internal checking in case the entity quest barfed
7979 	 */
7980 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7981 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7982 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 		  "Internal: %%%s; is not a parameter entity\n",
7984 			  name, NULL);
7985 	} else {
7986             xmlChar start[4];
7987             xmlCharEncoding enc;
7988 
7989 	    if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7990 	        return;
7991 
7992 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7993 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7994 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7995 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7996 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7997 		(ctxt->replaceEntities == 0) &&
7998 		(ctxt->validate == 0))
7999 		return;
8000 
8001 	    input = xmlNewEntityInputStream(ctxt, entity);
8002 	    if (xmlPushInput(ctxt, input) < 0) {
8003                 xmlFreeInputStream(input);
8004 		return;
8005             }
8006 
8007 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8008                 /*
8009                  * Get the 4 first bytes and decode the charset
8010                  * if enc != XML_CHAR_ENCODING_NONE
8011                  * plug some encoding conversion routines.
8012                  * Note that, since we may have some non-UTF8
8013                  * encoding (like UTF16, bug 135229), the 'length'
8014                  * is not known, but we can calculate based upon
8015                  * the amount of data in the buffer.
8016                  */
8017                 GROW
8018                 if (ctxt->instate == XML_PARSER_EOF)
8019                     return;
8020                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8021                     start[0] = RAW;
8022                     start[1] = NXT(1);
8023                     start[2] = NXT(2);
8024                     start[3] = NXT(3);
8025                     enc = xmlDetectCharEncoding(start, 4);
8026                     if (enc != XML_CHAR_ENCODING_NONE) {
8027                         xmlSwitchEncoding(ctxt, enc);
8028                     }
8029                 }
8030 
8031                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8032                     (IS_BLANK_CH(NXT(5)))) {
8033                     xmlParseTextDecl(ctxt);
8034                 }
8035             }
8036 	}
8037     }
8038     ctxt->hasPErefs = 1;
8039 }
8040 
8041 /**
8042  * xmlLoadEntityContent:
8043  * @ctxt:  an XML parser context
8044  * @entity: an unloaded system entity
8045  *
8046  * Load the original content of the given system entity from the
8047  * ExternalID/SystemID given. This is to be used for Included in Literal
8048  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8049  *
8050  * Returns 0 in case of success and -1 in case of failure
8051  */
8052 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8053 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8054     xmlParserInputPtr input;
8055     xmlBufferPtr buf;
8056     int l, c;
8057     int count = 0;
8058 
8059     if ((ctxt == NULL) || (entity == NULL) ||
8060         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8061 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8062 	(entity->content != NULL)) {
8063 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8064 	            "xmlLoadEntityContent parameter error");
8065         return(-1);
8066     }
8067 
8068     if (xmlParserDebugEntities)
8069 	xmlGenericError(xmlGenericErrorContext,
8070 		"Reading %s entity content input\n", entity->name);
8071 
8072     buf = xmlBufferCreate();
8073     if (buf == NULL) {
8074 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8075 	            "xmlLoadEntityContent parameter error");
8076         return(-1);
8077     }
8078 
8079     input = xmlNewEntityInputStream(ctxt, entity);
8080     if (input == NULL) {
8081 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 	            "xmlLoadEntityContent input error");
8083 	xmlBufferFree(buf);
8084         return(-1);
8085     }
8086 
8087     /*
8088      * Push the entity as the current input, read char by char
8089      * saving to the buffer until the end of the entity or an error
8090      */
8091     if (xmlPushInput(ctxt, input) < 0) {
8092         xmlBufferFree(buf);
8093 	return(-1);
8094     }
8095 
8096     GROW;
8097     c = CUR_CHAR(l);
8098     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8099            (IS_CHAR(c))) {
8100         xmlBufferAdd(buf, ctxt->input->cur, l);
8101 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8102 	    count = 0;
8103 	    GROW;
8104             if (ctxt->instate == XML_PARSER_EOF) {
8105                 xmlBufferFree(buf);
8106                 return(-1);
8107             }
8108 	}
8109 	NEXTL(l);
8110 	c = CUR_CHAR(l);
8111 	if (c == 0) {
8112 	    count = 0;
8113 	    GROW;
8114             if (ctxt->instate == XML_PARSER_EOF) {
8115                 xmlBufferFree(buf);
8116                 return(-1);
8117             }
8118 	    c = CUR_CHAR(l);
8119 	}
8120     }
8121 
8122     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8123         xmlPopInput(ctxt);
8124     } else if (!IS_CHAR(c)) {
8125         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8126                           "xmlLoadEntityContent: invalid char value %d\n",
8127 	                  c);
8128 	xmlBufferFree(buf);
8129 	return(-1);
8130     }
8131     entity->content = buf->content;
8132     buf->content = NULL;
8133     xmlBufferFree(buf);
8134 
8135     return(0);
8136 }
8137 
8138 /**
8139  * xmlParseStringPEReference:
8140  * @ctxt:  an XML parser context
8141  * @str:  a pointer to an index in the string
8142  *
8143  * parse PEReference declarations
8144  *
8145  * [69] PEReference ::= '%' Name ';'
8146  *
8147  * [ WFC: No Recursion ]
8148  * A parsed entity must not contain a recursive
8149  * reference to itself, either directly or indirectly.
8150  *
8151  * [ WFC: Entity Declared ]
8152  * In a document without any DTD, a document with only an internal DTD
8153  * subset which contains no parameter entity references, or a document
8154  * with "standalone='yes'", ...  ... The declaration of a parameter
8155  * entity must precede any reference to it...
8156  *
8157  * [ VC: Entity Declared ]
8158  * In a document with an external subset or external parameter entities
8159  * with "standalone='no'", ...  ... The declaration of a parameter entity
8160  * must precede any reference to it...
8161  *
8162  * [ WFC: In DTD ]
8163  * Parameter-entity references may only appear in the DTD.
8164  * NOTE: misleading but this is handled.
8165  *
8166  * Returns the string of the entity content.
8167  *         str is updated to the current value of the index
8168  */
8169 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8170 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8171     const xmlChar *ptr;
8172     xmlChar cur;
8173     xmlChar *name;
8174     xmlEntityPtr entity = NULL;
8175 
8176     if ((str == NULL) || (*str == NULL)) return(NULL);
8177     ptr = *str;
8178     cur = *ptr;
8179     if (cur != '%')
8180         return(NULL);
8181     ptr++;
8182     name = xmlParseStringName(ctxt, &ptr);
8183     if (name == NULL) {
8184 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8185 		       "xmlParseStringPEReference: no name\n");
8186 	*str = ptr;
8187 	return(NULL);
8188     }
8189     cur = *ptr;
8190     if (cur != ';') {
8191 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8192 	xmlFree(name);
8193 	*str = ptr;
8194 	return(NULL);
8195     }
8196     ptr++;
8197 
8198     /*
8199      * Increase the number of entity references parsed
8200      */
8201     ctxt->nbentities++;
8202 
8203     /*
8204      * Request the entity from SAX
8205      */
8206     if ((ctxt->sax != NULL) &&
8207 	(ctxt->sax->getParameterEntity != NULL))
8208 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8209     if (ctxt->instate == XML_PARSER_EOF) {
8210 	xmlFree(name);
8211 	*str = ptr;
8212 	return(NULL);
8213     }
8214     if (entity == NULL) {
8215 	/*
8216 	 * [ WFC: Entity Declared ]
8217 	 * In a document without any DTD, a document with only an
8218 	 * internal DTD subset which contains no parameter entity
8219 	 * references, or a document with "standalone='yes'", ...
8220 	 * ... The declaration of a parameter entity must precede
8221 	 * any reference to it...
8222 	 */
8223 	if ((ctxt->standalone == 1) ||
8224 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8225 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8226 		 "PEReference: %%%s; not found\n", name);
8227 	} else {
8228 	    /*
8229 	     * [ VC: Entity Declared ]
8230 	     * In a document with an external subset or external
8231 	     * parameter entities with "standalone='no'", ...
8232 	     * ... The declaration of a parameter entity must
8233 	     * precede any reference to it...
8234 	     */
8235 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236 			  "PEReference: %%%s; not found\n",
8237 			  name, NULL);
8238 	    ctxt->valid = 0;
8239 	}
8240 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8241     } else {
8242 	/*
8243 	 * Internal checking in case the entity quest barfed
8244 	 */
8245 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8246 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8247 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248 			  "%%%s; is not a parameter entity\n",
8249 			  name, NULL);
8250 	}
8251     }
8252     ctxt->hasPErefs = 1;
8253     xmlFree(name);
8254     *str = ptr;
8255     return(entity);
8256 }
8257 
8258 /**
8259  * xmlParseDocTypeDecl:
8260  * @ctxt:  an XML parser context
8261  *
8262  * parse a DOCTYPE declaration
8263  *
8264  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8265  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266  *
8267  * [ VC: Root Element Type ]
8268  * The Name in the document type declaration must match the element
8269  * type of the root element.
8270  */
8271 
8272 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8273 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8274     const xmlChar *name = NULL;
8275     xmlChar *ExternalID = NULL;
8276     xmlChar *URI = NULL;
8277 
8278     /*
8279      * We know that '<!DOCTYPE' has been detected.
8280      */
8281     SKIP(9);
8282 
8283     SKIP_BLANKS;
8284 
8285     /*
8286      * Parse the DOCTYPE name.
8287      */
8288     name = xmlParseName(ctxt);
8289     if (name == NULL) {
8290 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8291 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8292     }
8293     ctxt->intSubName = name;
8294 
8295     SKIP_BLANKS;
8296 
8297     /*
8298      * Check for SystemID and ExternalID
8299      */
8300     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8301 
8302     if ((URI != NULL) || (ExternalID != NULL)) {
8303         ctxt->hasExternalSubset = 1;
8304     }
8305     ctxt->extSubURI = URI;
8306     ctxt->extSubSystem = ExternalID;
8307 
8308     SKIP_BLANKS;
8309 
8310     /*
8311      * Create and update the internal subset.
8312      */
8313     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8314 	(!ctxt->disableSAX))
8315 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8316     if (ctxt->instate == XML_PARSER_EOF)
8317 	return;
8318 
8319     /*
8320      * Is there any internal subset declarations ?
8321      * they are handled separately in xmlParseInternalSubset()
8322      */
8323     if (RAW == '[')
8324 	return;
8325 
8326     /*
8327      * We should be at the end of the DOCTYPE declaration.
8328      */
8329     if (RAW != '>') {
8330 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8331     }
8332     NEXT;
8333 }
8334 
8335 /**
8336  * xmlParseInternalSubset:
8337  * @ctxt:  an XML parser context
8338  *
8339  * parse the internal subset declaration
8340  *
8341  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8342  */
8343 
8344 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8345 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8346     /*
8347      * Is there any DTD definition ?
8348      */
8349     if (RAW == '[') {
8350         int baseInputNr = ctxt->inputNr;
8351         ctxt->instate = XML_PARSER_DTD;
8352         NEXT;
8353 	/*
8354 	 * Parse the succession of Markup declarations and
8355 	 * PEReferences.
8356 	 * Subsequence (markupdecl | PEReference | S)*
8357 	 */
8358 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8359                (ctxt->instate != XML_PARSER_EOF)) {
8360 	    const xmlChar *check = CUR_PTR;
8361 	    unsigned int cons = ctxt->input->consumed;
8362 
8363 	    SKIP_BLANKS;
8364 	    xmlParseMarkupDecl(ctxt);
8365 	    xmlParsePEReference(ctxt);
8366 
8367             /*
8368              * Conditional sections are allowed from external entities included
8369              * by PE References in the internal subset.
8370              */
8371             if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8372                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8373                 xmlParseConditionalSections(ctxt);
8374             }
8375 
8376 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8377 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8378 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8379                 if (ctxt->inputNr > baseInputNr)
8380                     xmlPopInput(ctxt);
8381                 else
8382 		    break;
8383 	    }
8384 	}
8385 	if (RAW == ']') {
8386 	    NEXT;
8387 	    SKIP_BLANKS;
8388 	}
8389     }
8390 
8391     /*
8392      * We should be at the end of the DOCTYPE declaration.
8393      */
8394     if (RAW != '>') {
8395 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8396 	return;
8397     }
8398     NEXT;
8399 }
8400 
8401 #ifdef LIBXML_SAX1_ENABLED
8402 /**
8403  * xmlParseAttribute:
8404  * @ctxt:  an XML parser context
8405  * @value:  a xmlChar ** used to store the value of the attribute
8406  *
8407  * parse an attribute
8408  *
8409  * [41] Attribute ::= Name Eq AttValue
8410  *
8411  * [ WFC: No External Entity References ]
8412  * Attribute values cannot contain direct or indirect entity references
8413  * to external entities.
8414  *
8415  * [ WFC: No < in Attribute Values ]
8416  * The replacement text of any entity referred to directly or indirectly in
8417  * an attribute value (other than "&lt;") must not contain a <.
8418  *
8419  * [ VC: Attribute Value Type ]
8420  * The attribute must have been declared; the value must be of the type
8421  * declared for it.
8422  *
8423  * [25] Eq ::= S? '=' S?
8424  *
8425  * With namespace:
8426  *
8427  * [NS 11] Attribute ::= QName Eq AttValue
8428  *
8429  * Also the case QName == xmlns:??? is handled independently as a namespace
8430  * definition.
8431  *
8432  * Returns the attribute name, and the value in *value.
8433  */
8434 
8435 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8436 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8437     const xmlChar *name;
8438     xmlChar *val;
8439 
8440     *value = NULL;
8441     GROW;
8442     name = xmlParseName(ctxt);
8443     if (name == NULL) {
8444 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8445 	               "error parsing attribute name\n");
8446         return(NULL);
8447     }
8448 
8449     /*
8450      * read the value
8451      */
8452     SKIP_BLANKS;
8453     if (RAW == '=') {
8454         NEXT;
8455 	SKIP_BLANKS;
8456 	val = xmlParseAttValue(ctxt);
8457 	ctxt->instate = XML_PARSER_CONTENT;
8458     } else {
8459 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8460 	       "Specification mandates value for attribute %s\n", name);
8461 	return(NULL);
8462     }
8463 
8464     /*
8465      * Check that xml:lang conforms to the specification
8466      * No more registered as an error, just generate a warning now
8467      * since this was deprecated in XML second edition
8468      */
8469     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8470 	if (!xmlCheckLanguageID(val)) {
8471 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8472 		          "Malformed value for xml:lang : %s\n",
8473 			  val, NULL);
8474 	}
8475     }
8476 
8477     /*
8478      * Check that xml:space conforms to the specification
8479      */
8480     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8481 	if (xmlStrEqual(val, BAD_CAST "default"))
8482 	    *(ctxt->space) = 0;
8483 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8484 	    *(ctxt->space) = 1;
8485 	else {
8486 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8487 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8488                                  val, NULL);
8489 	}
8490     }
8491 
8492     *value = val;
8493     return(name);
8494 }
8495 
8496 /**
8497  * xmlParseStartTag:
8498  * @ctxt:  an XML parser context
8499  *
8500  * parse a start of tag either for rule element or
8501  * EmptyElement. In both case we don't parse the tag closing chars.
8502  *
8503  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8504  *
8505  * [ WFC: Unique Att Spec ]
8506  * No attribute name may appear more than once in the same start-tag or
8507  * empty-element tag.
8508  *
8509  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8510  *
8511  * [ WFC: Unique Att Spec ]
8512  * No attribute name may appear more than once in the same start-tag or
8513  * empty-element tag.
8514  *
8515  * With namespace:
8516  *
8517  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8518  *
8519  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8520  *
8521  * Returns the element name parsed
8522  */
8523 
8524 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8525 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8526     const xmlChar *name;
8527     const xmlChar *attname;
8528     xmlChar *attvalue;
8529     const xmlChar **atts = ctxt->atts;
8530     int nbatts = 0;
8531     int maxatts = ctxt->maxatts;
8532     int i;
8533 
8534     if (RAW != '<') return(NULL);
8535     NEXT1;
8536 
8537     name = xmlParseName(ctxt);
8538     if (name == NULL) {
8539 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8540 	     "xmlParseStartTag: invalid element name\n");
8541         return(NULL);
8542     }
8543 
8544     /*
8545      * Now parse the attributes, it ends up with the ending
8546      *
8547      * (S Attribute)* S?
8548      */
8549     SKIP_BLANKS;
8550     GROW;
8551 
8552     while (((RAW != '>') &&
8553 	   ((RAW != '/') || (NXT(1) != '>')) &&
8554 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8555 	const xmlChar *q = CUR_PTR;
8556 	unsigned int cons = ctxt->input->consumed;
8557 
8558 	attname = xmlParseAttribute(ctxt, &attvalue);
8559         if ((attname != NULL) && (attvalue != NULL)) {
8560 	    /*
8561 	     * [ WFC: Unique Att Spec ]
8562 	     * No attribute name may appear more than once in the same
8563 	     * start-tag or empty-element tag.
8564 	     */
8565 	    for (i = 0; i < nbatts;i += 2) {
8566 	        if (xmlStrEqual(atts[i], attname)) {
8567 		    xmlErrAttributeDup(ctxt, NULL, attname);
8568 		    xmlFree(attvalue);
8569 		    goto failed;
8570 		}
8571 	    }
8572 	    /*
8573 	     * Add the pair to atts
8574 	     */
8575 	    if (atts == NULL) {
8576 	        maxatts = 22; /* allow for 10 attrs by default */
8577 	        atts = (const xmlChar **)
8578 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8579 		if (atts == NULL) {
8580 		    xmlErrMemory(ctxt, NULL);
8581 		    if (attvalue != NULL)
8582 			xmlFree(attvalue);
8583 		    goto failed;
8584 		}
8585 		ctxt->atts = atts;
8586 		ctxt->maxatts = maxatts;
8587 	    } else if (nbatts + 4 > maxatts) {
8588 	        const xmlChar **n;
8589 
8590 	        maxatts *= 2;
8591 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8592 					     maxatts * sizeof(const xmlChar *));
8593 		if (n == NULL) {
8594 		    xmlErrMemory(ctxt, NULL);
8595 		    if (attvalue != NULL)
8596 			xmlFree(attvalue);
8597 		    goto failed;
8598 		}
8599 		atts = n;
8600 		ctxt->atts = atts;
8601 		ctxt->maxatts = maxatts;
8602 	    }
8603 	    atts[nbatts++] = attname;
8604 	    atts[nbatts++] = attvalue;
8605 	    atts[nbatts] = NULL;
8606 	    atts[nbatts + 1] = NULL;
8607 	} else {
8608 	    if (attvalue != NULL)
8609 		xmlFree(attvalue);
8610 	}
8611 
8612 failed:
8613 
8614 	GROW
8615 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8616 	    break;
8617 	if (SKIP_BLANKS == 0) {
8618 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8619 			   "attributes construct error\n");
8620 	}
8621         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8622             (attname == NULL) && (attvalue == NULL)) {
8623 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8624 			   "xmlParseStartTag: problem parsing attributes\n");
8625 	    break;
8626 	}
8627 	SHRINK;
8628         GROW;
8629     }
8630 
8631     /*
8632      * SAX: Start of Element !
8633      */
8634     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8635 	(!ctxt->disableSAX)) {
8636 	if (nbatts > 0)
8637 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8638 	else
8639 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8640     }
8641 
8642     if (atts != NULL) {
8643         /* Free only the content strings */
8644         for (i = 1;i < nbatts;i+=2)
8645 	    if (atts[i] != NULL)
8646 	       xmlFree((xmlChar *) atts[i]);
8647     }
8648     return(name);
8649 }
8650 
8651 /**
8652  * xmlParseEndTag1:
8653  * @ctxt:  an XML parser context
8654  * @line:  line of the start tag
8655  * @nsNr:  number of namespaces on the start tag
8656  *
8657  * parse an end of tag
8658  *
8659  * [42] ETag ::= '</' Name S? '>'
8660  *
8661  * With namespace
8662  *
8663  * [NS 9] ETag ::= '</' QName S? '>'
8664  */
8665 
8666 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8667 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8668     const xmlChar *name;
8669 
8670     GROW;
8671     if ((RAW != '<') || (NXT(1) != '/')) {
8672 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8673 		       "xmlParseEndTag: '</' not found\n");
8674 	return;
8675     }
8676     SKIP(2);
8677 
8678     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8679 
8680     /*
8681      * We should definitely be at the ending "S? '>'" part
8682      */
8683     GROW;
8684     SKIP_BLANKS;
8685     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8686 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8687     } else
8688 	NEXT1;
8689 
8690     /*
8691      * [ WFC: Element Type Match ]
8692      * The Name in an element's end-tag must match the element type in the
8693      * start-tag.
8694      *
8695      */
8696     if (name != (xmlChar*)1) {
8697         if (name == NULL) name = BAD_CAST "unparsable";
8698         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8699 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8700 		                ctxt->name, line, name);
8701     }
8702 
8703     /*
8704      * SAX: End of Tag
8705      */
8706     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8707 	(!ctxt->disableSAX))
8708         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8709 
8710     namePop(ctxt);
8711     spacePop(ctxt);
8712     return;
8713 }
8714 
8715 /**
8716  * xmlParseEndTag:
8717  * @ctxt:  an XML parser context
8718  *
8719  * parse an end of tag
8720  *
8721  * [42] ETag ::= '</' Name S? '>'
8722  *
8723  * With namespace
8724  *
8725  * [NS 9] ETag ::= '</' QName S? '>'
8726  */
8727 
8728 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8729 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8730     xmlParseEndTag1(ctxt, 0);
8731 }
8732 #endif /* LIBXML_SAX1_ENABLED */
8733 
8734 /************************************************************************
8735  *									*
8736  *		      SAX 2 specific operations				*
8737  *									*
8738  ************************************************************************/
8739 
8740 /*
8741  * xmlGetNamespace:
8742  * @ctxt:  an XML parser context
8743  * @prefix:  the prefix to lookup
8744  *
8745  * Lookup the namespace name for the @prefix (which ca be NULL)
8746  * The prefix must come from the @ctxt->dict dictionary
8747  *
8748  * Returns the namespace name or NULL if not bound
8749  */
8750 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8751 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8752     int i;
8753 
8754     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8755     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8756         if (ctxt->nsTab[i] == prefix) {
8757 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8758 	        return(NULL);
8759 	    return(ctxt->nsTab[i + 1]);
8760 	}
8761     return(NULL);
8762 }
8763 
8764 /**
8765  * xmlParseQName:
8766  * @ctxt:  an XML parser context
8767  * @prefix:  pointer to store the prefix part
8768  *
8769  * parse an XML Namespace QName
8770  *
8771  * [6]  QName  ::= (Prefix ':')? LocalPart
8772  * [7]  Prefix  ::= NCName
8773  * [8]  LocalPart  ::= NCName
8774  *
8775  * Returns the Name parsed or NULL
8776  */
8777 
8778 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8779 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8780     const xmlChar *l, *p;
8781 
8782     GROW;
8783 
8784     l = xmlParseNCName(ctxt);
8785     if (l == NULL) {
8786         if (CUR == ':') {
8787 	    l = xmlParseName(ctxt);
8788 	    if (l != NULL) {
8789 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8790 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8791 		*prefix = NULL;
8792 		return(l);
8793 	    }
8794 	}
8795         return(NULL);
8796     }
8797     if (CUR == ':') {
8798         NEXT;
8799 	p = l;
8800 	l = xmlParseNCName(ctxt);
8801 	if (l == NULL) {
8802 	    xmlChar *tmp;
8803 
8804             if (ctxt->instate == XML_PARSER_EOF)
8805                 return(NULL);
8806             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8807 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8808 	    l = xmlParseNmtoken(ctxt);
8809 	    if (l == NULL) {
8810                 if (ctxt->instate == XML_PARSER_EOF)
8811                     return(NULL);
8812 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8813             } else {
8814 		tmp = xmlBuildQName(l, p, NULL, 0);
8815 		xmlFree((char *)l);
8816 	    }
8817 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8818 	    if (tmp != NULL) xmlFree(tmp);
8819 	    *prefix = NULL;
8820 	    return(p);
8821 	}
8822 	if (CUR == ':') {
8823 	    xmlChar *tmp;
8824 
8825             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8826 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8827 	    NEXT;
8828 	    tmp = (xmlChar *) xmlParseName(ctxt);
8829 	    if (tmp != NULL) {
8830 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8831 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8832 		if (tmp != NULL) xmlFree(tmp);
8833 		*prefix = p;
8834 		return(l);
8835 	    }
8836             if (ctxt->instate == XML_PARSER_EOF)
8837                 return(NULL);
8838 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8839 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8840 	    if (tmp != NULL) xmlFree(tmp);
8841 	    *prefix = p;
8842 	    return(l);
8843 	}
8844 	*prefix = p;
8845     } else
8846         *prefix = NULL;
8847     return(l);
8848 }
8849 
8850 /**
8851  * xmlParseQNameAndCompare:
8852  * @ctxt:  an XML parser context
8853  * @name:  the localname
8854  * @prefix:  the prefix, if any.
8855  *
8856  * parse an XML name and compares for match
8857  * (specialized for endtag parsing)
8858  *
8859  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8860  * and the name for mismatch
8861  */
8862 
8863 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8864 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8865                         xmlChar const *prefix) {
8866     const xmlChar *cmp;
8867     const xmlChar *in;
8868     const xmlChar *ret;
8869     const xmlChar *prefix2;
8870 
8871     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8872 
8873     GROW;
8874     in = ctxt->input->cur;
8875 
8876     cmp = prefix;
8877     while (*in != 0 && *in == *cmp) {
8878 	++in;
8879 	++cmp;
8880     }
8881     if ((*cmp == 0) && (*in == ':')) {
8882         in++;
8883 	cmp = name;
8884 	while (*in != 0 && *in == *cmp) {
8885 	    ++in;
8886 	    ++cmp;
8887 	}
8888 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8889 	    /* success */
8890             ctxt->input->col += in - ctxt->input->cur;
8891 	    ctxt->input->cur = in;
8892 	    return((const xmlChar*) 1);
8893 	}
8894     }
8895     /*
8896      * all strings coms from the dictionary, equality can be done directly
8897      */
8898     ret = xmlParseQName (ctxt, &prefix2);
8899     if ((ret == name) && (prefix == prefix2))
8900 	return((const xmlChar*) 1);
8901     return ret;
8902 }
8903 
8904 /**
8905  * xmlParseAttValueInternal:
8906  * @ctxt:  an XML parser context
8907  * @len:  attribute len result
8908  * @alloc:  whether the attribute was reallocated as a new string
8909  * @normalize:  if 1 then further non-CDATA normalization must be done
8910  *
8911  * parse a value for an attribute.
8912  * NOTE: if no normalization is needed, the routine will return pointers
8913  *       directly from the data buffer.
8914  *
8915  * 3.3.3 Attribute-Value Normalization:
8916  * Before the value of an attribute is passed to the application or
8917  * checked for validity, the XML processor must normalize it as follows:
8918  * - a character reference is processed by appending the referenced
8919  *   character to the attribute value
8920  * - an entity reference is processed by recursively processing the
8921  *   replacement text of the entity
8922  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8923  *   appending #x20 to the normalized value, except that only a single
8924  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8925  *   parsed entity or the literal entity value of an internal parsed entity
8926  * - other characters are processed by appending them to the normalized value
8927  * If the declared value is not CDATA, then the XML processor must further
8928  * process the normalized attribute value by discarding any leading and
8929  * trailing space (#x20) characters, and by replacing sequences of space
8930  * (#x20) characters by a single space (#x20) character.
8931  * All attributes for which no declaration has been read should be treated
8932  * by a non-validating parser as if declared CDATA.
8933  *
8934  * Returns the AttValue parsed or NULL. The value has to be freed by the
8935  *     caller if it was copied, this can be detected by val[*len] == 0.
8936  */
8937 
8938 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8939     const xmlChar *oldbase = ctxt->input->base;\
8940     GROW;\
8941     if (ctxt->instate == XML_PARSER_EOF)\
8942         return(NULL);\
8943     if (oldbase != ctxt->input->base) {\
8944         ptrdiff_t delta = ctxt->input->base - oldbase;\
8945         start = start + delta;\
8946         in = in + delta;\
8947     }\
8948     end = ctxt->input->end;
8949 
8950 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8951 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8952                          int normalize)
8953 {
8954     xmlChar limit = 0;
8955     const xmlChar *in = NULL, *start, *end, *last;
8956     xmlChar *ret = NULL;
8957     int line, col;
8958 
8959     GROW;
8960     in = (xmlChar *) CUR_PTR;
8961     line = ctxt->input->line;
8962     col = ctxt->input->col;
8963     if (*in != '"' && *in != '\'') {
8964         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8965         return (NULL);
8966     }
8967     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8968 
8969     /*
8970      * try to handle in this routine the most common case where no
8971      * allocation of a new string is required and where content is
8972      * pure ASCII.
8973      */
8974     limit = *in++;
8975     col++;
8976     end = ctxt->input->end;
8977     start = in;
8978     if (in >= end) {
8979         GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8980     }
8981     if (normalize) {
8982         /*
8983 	 * Skip any leading spaces
8984 	 */
8985 	while ((in < end) && (*in != limit) &&
8986 	       ((*in == 0x20) || (*in == 0x9) ||
8987 	        (*in == 0xA) || (*in == 0xD))) {
8988 	    if (*in == 0xA) {
8989 	        line++; col = 1;
8990 	    } else {
8991 	        col++;
8992 	    }
8993 	    in++;
8994 	    start = in;
8995 	    if (in >= end) {
8996                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8997                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9000                                    "AttValue length too long\n");
9001                     return(NULL);
9002                 }
9003 	    }
9004 	}
9005 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9006 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9007 	    col++;
9008 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
9009 	    if (in >= end) {
9010                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9011                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9012                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9013                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9014                                    "AttValue length too long\n");
9015                     return(NULL);
9016                 }
9017 	    }
9018 	}
9019 	last = in;
9020 	/*
9021 	 * skip the trailing blanks
9022 	 */
9023 	while ((last[-1] == 0x20) && (last > start)) last--;
9024 	while ((in < end) && (*in != limit) &&
9025 	       ((*in == 0x20) || (*in == 0x9) ||
9026 	        (*in == 0xA) || (*in == 0xD))) {
9027 	    if (*in == 0xA) {
9028 	        line++, col = 1;
9029 	    } else {
9030 	        col++;
9031 	    }
9032 	    in++;
9033 	    if (in >= end) {
9034 		const xmlChar *oldbase = ctxt->input->base;
9035 		GROW;
9036                 if (ctxt->instate == XML_PARSER_EOF)
9037                     return(NULL);
9038 		if (oldbase != ctxt->input->base) {
9039 		    ptrdiff_t delta = ctxt->input->base - oldbase;
9040 		    start = start + delta;
9041 		    in = in + delta;
9042 		    last = last + delta;
9043 		}
9044 		end = ctxt->input->end;
9045                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9046                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9047                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9048                                    "AttValue length too long\n");
9049                     return(NULL);
9050                 }
9051 	    }
9052 	}
9053         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9054             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9055             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9056                            "AttValue length too long\n");
9057             return(NULL);
9058         }
9059 	if (*in != limit) goto need_complex;
9060     } else {
9061 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9062 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9063 	    in++;
9064 	    col++;
9065 	    if (in >= end) {
9066                 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9067                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9068                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9069                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9070                                    "AttValue length too long\n");
9071                     return(NULL);
9072                 }
9073 	    }
9074 	}
9075 	last = in;
9076         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9077             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9078             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9079                            "AttValue length too long\n");
9080             return(NULL);
9081         }
9082 	if (*in != limit) goto need_complex;
9083     }
9084     in++;
9085     col++;
9086     if (len != NULL) {
9087         *len = last - start;
9088         ret = (xmlChar *) start;
9089     } else {
9090         if (alloc) *alloc = 1;
9091         ret = xmlStrndup(start, last - start);
9092     }
9093     CUR_PTR = in;
9094     ctxt->input->line = line;
9095     ctxt->input->col = col;
9096     if (alloc) *alloc = 0;
9097     return ret;
9098 need_complex:
9099     if (alloc) *alloc = 1;
9100     return xmlParseAttValueComplex(ctxt, len, normalize);
9101 }
9102 
9103 /**
9104  * xmlParseAttribute2:
9105  * @ctxt:  an XML parser context
9106  * @pref:  the element prefix
9107  * @elem:  the element name
9108  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9109  * @value:  a xmlChar ** used to store the value of the attribute
9110  * @len:  an int * to save the length of the attribute
9111  * @alloc:  an int * to indicate if the attribute was allocated
9112  *
9113  * parse an attribute in the new SAX2 framework.
9114  *
9115  * Returns the attribute name, and the value in *value, .
9116  */
9117 
9118 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9119 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9120                    const xmlChar * pref, const xmlChar * elem,
9121                    const xmlChar ** prefix, xmlChar ** value,
9122                    int *len, int *alloc)
9123 {
9124     const xmlChar *name;
9125     xmlChar *val, *internal_val = NULL;
9126     int normalize = 0;
9127 
9128     *value = NULL;
9129     GROW;
9130     name = xmlParseQName(ctxt, prefix);
9131     if (name == NULL) {
9132         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9133                        "error parsing attribute name\n");
9134         return (NULL);
9135     }
9136 
9137     /*
9138      * get the type if needed
9139      */
9140     if (ctxt->attsSpecial != NULL) {
9141         int type;
9142 
9143         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9144                                                  pref, elem, *prefix, name);
9145         if (type != 0)
9146             normalize = 1;
9147     }
9148 
9149     /*
9150      * read the value
9151      */
9152     SKIP_BLANKS;
9153     if (RAW == '=') {
9154         NEXT;
9155         SKIP_BLANKS;
9156         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9157 	if (normalize) {
9158 	    /*
9159 	     * Sometimes a second normalisation pass for spaces is needed
9160 	     * but that only happens if charrefs or entities references
9161 	     * have been used in the attribute value, i.e. the attribute
9162 	     * value have been extracted in an allocated string already.
9163 	     */
9164 	    if (*alloc) {
9165 	        const xmlChar *val2;
9166 
9167 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9168 		if ((val2 != NULL) && (val2 != val)) {
9169 		    xmlFree(val);
9170 		    val = (xmlChar *) val2;
9171 		}
9172 	    }
9173 	}
9174         ctxt->instate = XML_PARSER_CONTENT;
9175     } else {
9176         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9177                           "Specification mandates value for attribute %s\n",
9178                           name);
9179         return (NULL);
9180     }
9181 
9182     if (*prefix == ctxt->str_xml) {
9183         /*
9184          * Check that xml:lang conforms to the specification
9185          * No more registered as an error, just generate a warning now
9186          * since this was deprecated in XML second edition
9187          */
9188         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9189             internal_val = xmlStrndup(val, *len);
9190             if (!xmlCheckLanguageID(internal_val)) {
9191                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9192                               "Malformed value for xml:lang : %s\n",
9193                               internal_val, NULL);
9194             }
9195         }
9196 
9197         /*
9198          * Check that xml:space conforms to the specification
9199          */
9200         if (xmlStrEqual(name, BAD_CAST "space")) {
9201             internal_val = xmlStrndup(val, *len);
9202             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9203                 *(ctxt->space) = 0;
9204             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9205                 *(ctxt->space) = 1;
9206             else {
9207                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9208                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9209                               internal_val, NULL);
9210             }
9211         }
9212         if (internal_val) {
9213             xmlFree(internal_val);
9214         }
9215     }
9216 
9217     *value = val;
9218     return (name);
9219 }
9220 /**
9221  * xmlParseStartTag2:
9222  * @ctxt:  an XML parser context
9223  *
9224  * parse a start of tag either for rule element or
9225  * EmptyElement. In both case we don't parse the tag closing chars.
9226  * This routine is called when running SAX2 parsing
9227  *
9228  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9229  *
9230  * [ WFC: Unique Att Spec ]
9231  * No attribute name may appear more than once in the same start-tag or
9232  * empty-element tag.
9233  *
9234  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9235  *
9236  * [ WFC: Unique Att Spec ]
9237  * No attribute name may appear more than once in the same start-tag or
9238  * empty-element tag.
9239  *
9240  * With namespace:
9241  *
9242  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9243  *
9244  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9245  *
9246  * Returns the element name parsed
9247  */
9248 
9249 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9250 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9251                   const xmlChar **URI, int *tlen) {
9252     const xmlChar *localname;
9253     const xmlChar *prefix;
9254     const xmlChar *attname;
9255     const xmlChar *aprefix;
9256     const xmlChar *nsname;
9257     xmlChar *attvalue;
9258     const xmlChar **atts = ctxt->atts;
9259     int maxatts = ctxt->maxatts;
9260     int nratts, nbatts, nbdef, inputid;
9261     int i, j, nbNs, attval;
9262     unsigned long cur;
9263     int nsNr = ctxt->nsNr;
9264 
9265     if (RAW != '<') return(NULL);
9266     NEXT1;
9267 
9268     /*
9269      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9270      *       point since the attribute values may be stored as pointers to
9271      *       the buffer and calling SHRINK would destroy them !
9272      *       The Shrinking is only possible once the full set of attribute
9273      *       callbacks have been done.
9274      */
9275     SHRINK;
9276     cur = ctxt->input->cur - ctxt->input->base;
9277     inputid = ctxt->input->id;
9278     nbatts = 0;
9279     nratts = 0;
9280     nbdef = 0;
9281     nbNs = 0;
9282     attval = 0;
9283     /* Forget any namespaces added during an earlier parse of this element. */
9284     ctxt->nsNr = nsNr;
9285 
9286     localname = xmlParseQName(ctxt, &prefix);
9287     if (localname == NULL) {
9288 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9289 		       "StartTag: invalid element name\n");
9290         return(NULL);
9291     }
9292     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9293 
9294     /*
9295      * Now parse the attributes, it ends up with the ending
9296      *
9297      * (S Attribute)* S?
9298      */
9299     SKIP_BLANKS;
9300     GROW;
9301 
9302     while (((RAW != '>') &&
9303 	   ((RAW != '/') || (NXT(1) != '>')) &&
9304 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9305 	const xmlChar *q = CUR_PTR;
9306 	unsigned int cons = ctxt->input->consumed;
9307 	int len = -1, alloc = 0;
9308 
9309 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9310 	                             &aprefix, &attvalue, &len, &alloc);
9311         if ((attname == NULL) || (attvalue == NULL))
9312             goto next_attr;
9313 	if (len < 0) len = xmlStrlen(attvalue);
9314 
9315         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9316             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9317             xmlURIPtr uri;
9318 
9319             if (URL == NULL) {
9320                 xmlErrMemory(ctxt, "dictionary allocation failure");
9321                 if ((attvalue != NULL) && (alloc != 0))
9322                     xmlFree(attvalue);
9323                 localname = NULL;
9324                 goto done;
9325             }
9326             if (*URL != 0) {
9327                 uri = xmlParseURI((const char *) URL);
9328                 if (uri == NULL) {
9329                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9330                              "xmlns: '%s' is not a valid URI\n",
9331                                        URL, NULL, NULL);
9332                 } else {
9333                     if (uri->scheme == NULL) {
9334                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9335                                   "xmlns: URI %s is not absolute\n",
9336                                   URL, NULL, NULL);
9337                     }
9338                     xmlFreeURI(uri);
9339                 }
9340                 if (URL == ctxt->str_xml_ns) {
9341                     if (attname != ctxt->str_xml) {
9342                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9343                      "xml namespace URI cannot be the default namespace\n",
9344                                  NULL, NULL, NULL);
9345                     }
9346                     goto next_attr;
9347                 }
9348                 if ((len == 29) &&
9349                     (xmlStrEqual(URL,
9350                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9351                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352                          "reuse of the xmlns namespace name is forbidden\n",
9353                              NULL, NULL, NULL);
9354                     goto next_attr;
9355                 }
9356             }
9357             /*
9358              * check that it's not a defined namespace
9359              */
9360             for (j = 1;j <= nbNs;j++)
9361                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9362                     break;
9363             if (j <= nbNs)
9364                 xmlErrAttributeDup(ctxt, NULL, attname);
9365             else
9366                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9367 
9368         } else if (aprefix == ctxt->str_xmlns) {
9369             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9370             xmlURIPtr uri;
9371 
9372             if (attname == ctxt->str_xml) {
9373                 if (URL != ctxt->str_xml_ns) {
9374                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9375                              "xml namespace prefix mapped to wrong URI\n",
9376                              NULL, NULL, NULL);
9377                 }
9378                 /*
9379                  * Do not keep a namespace definition node
9380                  */
9381                 goto next_attr;
9382             }
9383             if (URL == ctxt->str_xml_ns) {
9384                 if (attname != ctxt->str_xml) {
9385                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9386                              "xml namespace URI mapped to wrong prefix\n",
9387                              NULL, NULL, NULL);
9388                 }
9389                 goto next_attr;
9390             }
9391             if (attname == ctxt->str_xmlns) {
9392                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393                          "redefinition of the xmlns prefix is forbidden\n",
9394                          NULL, NULL, NULL);
9395                 goto next_attr;
9396             }
9397             if ((len == 29) &&
9398                 (xmlStrEqual(URL,
9399                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9400                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9401                          "reuse of the xmlns namespace name is forbidden\n",
9402                          NULL, NULL, NULL);
9403                 goto next_attr;
9404             }
9405             if ((URL == NULL) || (URL[0] == 0)) {
9406                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407                          "xmlns:%s: Empty XML namespace is not allowed\n",
9408                               attname, NULL, NULL);
9409                 goto next_attr;
9410             } else {
9411                 uri = xmlParseURI((const char *) URL);
9412                 if (uri == NULL) {
9413                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9414                          "xmlns:%s: '%s' is not a valid URI\n",
9415                                        attname, URL, NULL);
9416                 } else {
9417                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9418                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9419                                   "xmlns:%s: URI %s is not absolute\n",
9420                                   attname, URL, NULL);
9421                     }
9422                     xmlFreeURI(uri);
9423                 }
9424             }
9425 
9426             /*
9427              * check that it's not a defined namespace
9428              */
9429             for (j = 1;j <= nbNs;j++)
9430                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9431                     break;
9432             if (j <= nbNs)
9433                 xmlErrAttributeDup(ctxt, aprefix, attname);
9434             else
9435                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9436 
9437         } else {
9438             /*
9439              * Add the pair to atts
9440              */
9441             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9442                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9443                     goto next_attr;
9444                 }
9445                 maxatts = ctxt->maxatts;
9446                 atts = ctxt->atts;
9447             }
9448             ctxt->attallocs[nratts++] = alloc;
9449             atts[nbatts++] = attname;
9450             atts[nbatts++] = aprefix;
9451             /*
9452              * The namespace URI field is used temporarily to point at the
9453              * base of the current input buffer for non-alloced attributes.
9454              * When the input buffer is reallocated, all the pointers become
9455              * invalid, but they can be reconstructed later.
9456              */
9457             if (alloc)
9458                 atts[nbatts++] = NULL;
9459             else
9460                 atts[nbatts++] = ctxt->input->base;
9461             atts[nbatts++] = attvalue;
9462             attvalue += len;
9463             atts[nbatts++] = attvalue;
9464             /*
9465              * tag if some deallocation is needed
9466              */
9467             if (alloc != 0) attval = 1;
9468             attvalue = NULL; /* moved into atts */
9469         }
9470 
9471 next_attr:
9472         if ((attvalue != NULL) && (alloc != 0)) {
9473             xmlFree(attvalue);
9474             attvalue = NULL;
9475         }
9476 
9477 	GROW
9478         if (ctxt->instate == XML_PARSER_EOF)
9479             break;
9480 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9481 	    break;
9482 	if (SKIP_BLANKS == 0) {
9483 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9484 			   "attributes construct error\n");
9485 	    break;
9486 	}
9487         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9488             (attname == NULL) && (attvalue == NULL)) {
9489 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9490 	         "xmlParseStartTag: problem parsing attributes\n");
9491 	    break;
9492 	}
9493         GROW;
9494     }
9495 
9496     if (ctxt->input->id != inputid) {
9497         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9498                     "Unexpected change of input\n");
9499         localname = NULL;
9500         goto done;
9501     }
9502 
9503     /* Reconstruct attribute value pointers. */
9504     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9505         if (atts[i+2] != NULL) {
9506             /*
9507              * Arithmetic on dangling pointers is technically undefined
9508              * behavior, but well...
9509              */
9510             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9511             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9512             atts[i+3] += offset;  /* value */
9513             atts[i+4] += offset;  /* valuend */
9514         }
9515     }
9516 
9517     /*
9518      * The attributes defaulting
9519      */
9520     if (ctxt->attsDefault != NULL) {
9521         xmlDefAttrsPtr defaults;
9522 
9523 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9524 	if (defaults != NULL) {
9525 	    for (i = 0;i < defaults->nbAttrs;i++) {
9526 	        attname = defaults->values[5 * i];
9527 		aprefix = defaults->values[5 * i + 1];
9528 
9529                 /*
9530 		 * special work for namespaces defaulted defs
9531 		 */
9532 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9533 		    /*
9534 		     * check that it's not a defined namespace
9535 		     */
9536 		    for (j = 1;j <= nbNs;j++)
9537 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9538 			    break;
9539 	            if (j <= nbNs) continue;
9540 
9541 		    nsname = xmlGetNamespace(ctxt, NULL);
9542 		    if (nsname != defaults->values[5 * i + 2]) {
9543 			if (nsPush(ctxt, NULL,
9544 			           defaults->values[5 * i + 2]) > 0)
9545 			    nbNs++;
9546 		    }
9547 		} else if (aprefix == ctxt->str_xmlns) {
9548 		    /*
9549 		     * check that it's not a defined namespace
9550 		     */
9551 		    for (j = 1;j <= nbNs;j++)
9552 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9553 			    break;
9554 	            if (j <= nbNs) continue;
9555 
9556 		    nsname = xmlGetNamespace(ctxt, attname);
9557 		    if (nsname != defaults->values[2]) {
9558 			if (nsPush(ctxt, attname,
9559 			           defaults->values[5 * i + 2]) > 0)
9560 			    nbNs++;
9561 		    }
9562 		} else {
9563 		    /*
9564 		     * check that it's not a defined attribute
9565 		     */
9566 		    for (j = 0;j < nbatts;j+=5) {
9567 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9568 			    break;
9569 		    }
9570 		    if (j < nbatts) continue;
9571 
9572 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9573 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9574                             localname = NULL;
9575                             goto done;
9576 			}
9577 			maxatts = ctxt->maxatts;
9578 			atts = ctxt->atts;
9579 		    }
9580 		    atts[nbatts++] = attname;
9581 		    atts[nbatts++] = aprefix;
9582 		    if (aprefix == NULL)
9583 			atts[nbatts++] = NULL;
9584 		    else
9585 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9586 		    atts[nbatts++] = defaults->values[5 * i + 2];
9587 		    atts[nbatts++] = defaults->values[5 * i + 3];
9588 		    if ((ctxt->standalone == 1) &&
9589 		        (defaults->values[5 * i + 4] != NULL)) {
9590 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9591 	  "standalone: attribute %s on %s defaulted from external subset\n",
9592 	                                 attname, localname);
9593 		    }
9594 		    nbdef++;
9595 		}
9596 	    }
9597 	}
9598     }
9599 
9600     /*
9601      * The attributes checkings
9602      */
9603     for (i = 0; i < nbatts;i += 5) {
9604         /*
9605 	* The default namespace does not apply to attribute names.
9606 	*/
9607 	if (atts[i + 1] != NULL) {
9608 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9609 	    if (nsname == NULL) {
9610 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9611 		    "Namespace prefix %s for %s on %s is not defined\n",
9612 		    atts[i + 1], atts[i], localname);
9613 	    }
9614 	    atts[i + 2] = nsname;
9615 	} else
9616 	    nsname = NULL;
9617 	/*
9618 	 * [ WFC: Unique Att Spec ]
9619 	 * No attribute name may appear more than once in the same
9620 	 * start-tag or empty-element tag.
9621 	 * As extended by the Namespace in XML REC.
9622 	 */
9623         for (j = 0; j < i;j += 5) {
9624 	    if (atts[i] == atts[j]) {
9625 	        if (atts[i+1] == atts[j+1]) {
9626 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9627 		    break;
9628 		}
9629 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9630 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9631 			     "Namespaced Attribute %s in '%s' redefined\n",
9632 			     atts[i], nsname, NULL);
9633 		    break;
9634 		}
9635 	    }
9636 	}
9637     }
9638 
9639     nsname = xmlGetNamespace(ctxt, prefix);
9640     if ((prefix != NULL) && (nsname == NULL)) {
9641 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9642 	         "Namespace prefix %s on %s is not defined\n",
9643 		 prefix, localname, NULL);
9644     }
9645     *pref = prefix;
9646     *URI = nsname;
9647 
9648     /*
9649      * SAX: Start of Element !
9650      */
9651     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9652 	(!ctxt->disableSAX)) {
9653 	if (nbNs > 0)
9654 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9655 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9656 			  nbatts / 5, nbdef, atts);
9657 	else
9658 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9659 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9660     }
9661 
9662 done:
9663     /*
9664      * Free up attribute allocated strings if needed
9665      */
9666     if (attval != 0) {
9667 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9668 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9669 	        xmlFree((xmlChar *) atts[i]);
9670     }
9671 
9672     return(localname);
9673 }
9674 
9675 /**
9676  * xmlParseEndTag2:
9677  * @ctxt:  an XML parser context
9678  * @line:  line of the start tag
9679  * @nsNr:  number of namespaces on the start tag
9680  *
9681  * parse an end of tag
9682  *
9683  * [42] ETag ::= '</' Name S? '>'
9684  *
9685  * With namespace
9686  *
9687  * [NS 9] ETag ::= '</' QName S? '>'
9688  */
9689 
9690 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9691 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9692     const xmlChar *name;
9693 
9694     GROW;
9695     if ((RAW != '<') || (NXT(1) != '/')) {
9696 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9697 	return;
9698     }
9699     SKIP(2);
9700 
9701     if (tag->prefix == NULL)
9702         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9703     else
9704         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9705 
9706     /*
9707      * We should definitely be at the ending "S? '>'" part
9708      */
9709     GROW;
9710     if (ctxt->instate == XML_PARSER_EOF)
9711         return;
9712     SKIP_BLANKS;
9713     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9714 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9715     } else
9716 	NEXT1;
9717 
9718     /*
9719      * [ WFC: Element Type Match ]
9720      * The Name in an element's end-tag must match the element type in the
9721      * start-tag.
9722      *
9723      */
9724     if (name != (xmlChar*)1) {
9725         if (name == NULL) name = BAD_CAST "unparsable";
9726         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9727 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9728 		                ctxt->name, tag->line, name);
9729     }
9730 
9731     /*
9732      * SAX: End of Tag
9733      */
9734     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9735 	(!ctxt->disableSAX))
9736 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9737                                 tag->URI);
9738 
9739     spacePop(ctxt);
9740     if (tag->nsNr != 0)
9741 	nsPop(ctxt, tag->nsNr);
9742 }
9743 
9744 /**
9745  * xmlParseCDSect:
9746  * @ctxt:  an XML parser context
9747  *
9748  * Parse escaped pure raw content.
9749  *
9750  * [18] CDSect ::= CDStart CData CDEnd
9751  *
9752  * [19] CDStart ::= '<![CDATA['
9753  *
9754  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9755  *
9756  * [21] CDEnd ::= ']]>'
9757  */
9758 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9759 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9760     xmlChar *buf = NULL;
9761     int len = 0;
9762     int size = XML_PARSER_BUFFER_SIZE;
9763     int r, rl;
9764     int	s, sl;
9765     int cur, l;
9766     int count = 0;
9767 
9768     /* Check 2.6.0 was NXT(0) not RAW */
9769     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9770 	SKIP(9);
9771     } else
9772         return;
9773 
9774     ctxt->instate = XML_PARSER_CDATA_SECTION;
9775     r = CUR_CHAR(rl);
9776     if (!IS_CHAR(r)) {
9777 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9778 	ctxt->instate = XML_PARSER_CONTENT;
9779         return;
9780     }
9781     NEXTL(rl);
9782     s = CUR_CHAR(sl);
9783     if (!IS_CHAR(s)) {
9784 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9785 	ctxt->instate = XML_PARSER_CONTENT;
9786         return;
9787     }
9788     NEXTL(sl);
9789     cur = CUR_CHAR(l);
9790     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9791     if (buf == NULL) {
9792 	xmlErrMemory(ctxt, NULL);
9793 	return;
9794     }
9795     while (IS_CHAR(cur) &&
9796            ((r != ']') || (s != ']') || (cur != '>'))) {
9797 	if (len + 5 >= size) {
9798 	    xmlChar *tmp;
9799 
9800             if ((size > XML_MAX_TEXT_LENGTH) &&
9801                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9802                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9803                              "CData section too big found", NULL);
9804                 xmlFree (buf);
9805                 return;
9806             }
9807 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9808 	    if (tmp == NULL) {
9809 	        xmlFree(buf);
9810 		xmlErrMemory(ctxt, NULL);
9811 		return;
9812 	    }
9813 	    buf = tmp;
9814 	    size *= 2;
9815 	}
9816 	COPY_BUF(rl,buf,len,r);
9817 	r = s;
9818 	rl = sl;
9819 	s = cur;
9820 	sl = l;
9821 	count++;
9822 	if (count > 50) {
9823 	    SHRINK;
9824 	    GROW;
9825             if (ctxt->instate == XML_PARSER_EOF) {
9826 		xmlFree(buf);
9827 		return;
9828             }
9829 	    count = 0;
9830 	}
9831 	NEXTL(l);
9832 	cur = CUR_CHAR(l);
9833     }
9834     buf[len] = 0;
9835     ctxt->instate = XML_PARSER_CONTENT;
9836     if (cur != '>') {
9837 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9838 	                     "CData section not finished\n%.50s\n", buf);
9839 	xmlFree(buf);
9840         return;
9841     }
9842     NEXTL(l);
9843 
9844     /*
9845      * OK the buffer is to be consumed as cdata.
9846      */
9847     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9848 	if (ctxt->sax->cdataBlock != NULL)
9849 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9850 	else if (ctxt->sax->characters != NULL)
9851 	    ctxt->sax->characters(ctxt->userData, buf, len);
9852     }
9853     xmlFree(buf);
9854 }
9855 
9856 /**
9857  * xmlParseContentInternal:
9858  * @ctxt:  an XML parser context
9859  *
9860  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9861  * unexpected EOF to the caller.
9862  */
9863 
9864 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9865 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9866     int nameNr = ctxt->nameNr;
9867 
9868     GROW;
9869     while ((RAW != 0) &&
9870 	   (ctxt->instate != XML_PARSER_EOF)) {
9871 	const xmlChar *test = CUR_PTR;
9872 	unsigned int cons = ctxt->input->consumed;
9873 	const xmlChar *cur = ctxt->input->cur;
9874 
9875 	/*
9876 	 * First case : a Processing Instruction.
9877 	 */
9878 	if ((*cur == '<') && (cur[1] == '?')) {
9879 	    xmlParsePI(ctxt);
9880 	}
9881 
9882 	/*
9883 	 * Second case : a CDSection
9884 	 */
9885 	/* 2.6.0 test was *cur not RAW */
9886 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9887 	    xmlParseCDSect(ctxt);
9888 	}
9889 
9890 	/*
9891 	 * Third case :  a comment
9892 	 */
9893 	else if ((*cur == '<') && (NXT(1) == '!') &&
9894 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9895 	    xmlParseComment(ctxt);
9896 	    ctxt->instate = XML_PARSER_CONTENT;
9897 	}
9898 
9899 	/*
9900 	 * Fourth case :  a sub-element.
9901 	 */
9902 	else if (*cur == '<') {
9903             if (NXT(1) == '/') {
9904                 if (ctxt->nameNr <= nameNr)
9905                     break;
9906 	        xmlParseElementEnd(ctxt);
9907             } else {
9908 	        xmlParseElementStart(ctxt);
9909             }
9910 	}
9911 
9912 	/*
9913 	 * Fifth case : a reference. If if has not been resolved,
9914 	 *    parsing returns it's Name, create the node
9915 	 */
9916 
9917 	else if (*cur == '&') {
9918 	    xmlParseReference(ctxt);
9919 	}
9920 
9921 	/*
9922 	 * Last case, text. Note that References are handled directly.
9923 	 */
9924 	else {
9925 	    xmlParseCharData(ctxt, 0);
9926 	}
9927 
9928 	GROW;
9929 	SHRINK;
9930 
9931 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9932 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9933 	                "detected an error in element content\n");
9934 	    xmlHaltParser(ctxt);
9935             break;
9936 	}
9937     }
9938 }
9939 
9940 /**
9941  * xmlParseContent:
9942  * @ctxt:  an XML parser context
9943  *
9944  * Parse a content sequence. Stops at EOF or '</'.
9945  *
9946  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9947  */
9948 
9949 void
xmlParseContent(xmlParserCtxtPtr ctxt)9950 xmlParseContent(xmlParserCtxtPtr ctxt) {
9951     int nameNr = ctxt->nameNr;
9952 
9953     xmlParseContentInternal(ctxt);
9954 
9955     if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9956         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9957         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9958         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9959                 "Premature end of data in tag %s line %d\n",
9960 		name, line, NULL);
9961     }
9962 }
9963 
9964 /**
9965  * xmlParseElement:
9966  * @ctxt:  an XML parser context
9967  *
9968  * parse an XML element
9969  *
9970  * [39] element ::= EmptyElemTag | STag content ETag
9971  *
9972  * [ WFC: Element Type Match ]
9973  * The Name in an element's end-tag must match the element type in the
9974  * start-tag.
9975  *
9976  */
9977 
9978 void
xmlParseElement(xmlParserCtxtPtr ctxt)9979 xmlParseElement(xmlParserCtxtPtr ctxt) {
9980     if (xmlParseElementStart(ctxt) != 0)
9981         return;
9982 
9983     xmlParseContentInternal(ctxt);
9984     if (ctxt->instate == XML_PARSER_EOF)
9985 	return;
9986 
9987     if (CUR == 0) {
9988         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9989         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9990         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9991                 "Premature end of data in tag %s line %d\n",
9992 		name, line, NULL);
9993         return;
9994     }
9995 
9996     xmlParseElementEnd(ctxt);
9997 }
9998 
9999 /**
10000  * xmlParseElementStart:
10001  * @ctxt:  an XML parser context
10002  *
10003  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10004  * opening tag was parsed, 1 if an empty element was parsed.
10005  */
10006 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10007 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10008     const xmlChar *name;
10009     const xmlChar *prefix = NULL;
10010     const xmlChar *URI = NULL;
10011     xmlParserNodeInfo node_info;
10012     int line, tlen = 0;
10013     xmlNodePtr ret;
10014     int nsNr = ctxt->nsNr;
10015 
10016     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10017         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10018 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10019 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10020 			  xmlParserMaxDepth);
10021 	xmlHaltParser(ctxt);
10022 	return(-1);
10023     }
10024 
10025     /* Capture start position */
10026     if (ctxt->record_info) {
10027         node_info.begin_pos = ctxt->input->consumed +
10028                           (CUR_PTR - ctxt->input->base);
10029 	node_info.begin_line = ctxt->input->line;
10030     }
10031 
10032     if (ctxt->spaceNr == 0)
10033 	spacePush(ctxt, -1);
10034     else if (*ctxt->space == -2)
10035 	spacePush(ctxt, -1);
10036     else
10037 	spacePush(ctxt, *ctxt->space);
10038 
10039     line = ctxt->input->line;
10040 #ifdef LIBXML_SAX1_ENABLED
10041     if (ctxt->sax2)
10042 #endif /* LIBXML_SAX1_ENABLED */
10043         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10044 #ifdef LIBXML_SAX1_ENABLED
10045     else
10046 	name = xmlParseStartTag(ctxt);
10047 #endif /* LIBXML_SAX1_ENABLED */
10048     if (ctxt->instate == XML_PARSER_EOF)
10049 	return(-1);
10050     if (name == NULL) {
10051 	spacePop(ctxt);
10052         return(-1);
10053     }
10054     nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10055     ret = ctxt->node;
10056 
10057 #ifdef LIBXML_VALID_ENABLED
10058     /*
10059      * [ VC: Root Element Type ]
10060      * The Name in the document type declaration must match the element
10061      * type of the root element.
10062      */
10063     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10064         ctxt->node && (ctxt->node == ctxt->myDoc->children))
10065         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10066 #endif /* LIBXML_VALID_ENABLED */
10067 
10068     /*
10069      * Check for an Empty Element.
10070      */
10071     if ((RAW == '/') && (NXT(1) == '>')) {
10072         SKIP(2);
10073 	if (ctxt->sax2) {
10074 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10075 		(!ctxt->disableSAX))
10076 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10077 #ifdef LIBXML_SAX1_ENABLED
10078 	} else {
10079 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10080 		(!ctxt->disableSAX))
10081 		ctxt->sax->endElement(ctxt->userData, name);
10082 #endif /* LIBXML_SAX1_ENABLED */
10083 	}
10084 	namePop(ctxt);
10085 	spacePop(ctxt);
10086 	if (nsNr != ctxt->nsNr)
10087 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10088 	if ( ret != NULL && ctxt->record_info ) {
10089 	   node_info.end_pos = ctxt->input->consumed +
10090 			      (CUR_PTR - ctxt->input->base);
10091 	   node_info.end_line = ctxt->input->line;
10092 	   node_info.node = ret;
10093 	   xmlParserAddNodeInfo(ctxt, &node_info);
10094 	}
10095 	return(1);
10096     }
10097     if (RAW == '>') {
10098         NEXT1;
10099     } else {
10100         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10101 		     "Couldn't find end of Start Tag %s line %d\n",
10102 		                name, line, NULL);
10103 
10104 	/*
10105 	 * end of parsing of this node.
10106 	 */
10107 	nodePop(ctxt);
10108 	namePop(ctxt);
10109 	spacePop(ctxt);
10110 	if (nsNr != ctxt->nsNr)
10111 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10112 
10113 	/*
10114 	 * Capture end position and add node
10115 	 */
10116 	if ( ret != NULL && ctxt->record_info ) {
10117 	   node_info.end_pos = ctxt->input->consumed +
10118 			      (CUR_PTR - ctxt->input->base);
10119 	   node_info.end_line = ctxt->input->line;
10120 	   node_info.node = ret;
10121 	   xmlParserAddNodeInfo(ctxt, &node_info);
10122 	}
10123 	return(-1);
10124     }
10125 
10126     return(0);
10127 }
10128 
10129 /**
10130  * xmlParseElementEnd:
10131  * @ctxt:  an XML parser context
10132  *
10133  * Parse the end of an XML element.
10134  */
10135 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10136 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10137     xmlParserNodeInfo node_info;
10138     xmlNodePtr ret = ctxt->node;
10139 
10140     if (ctxt->nameNr <= 0)
10141         return;
10142 
10143     /*
10144      * parse the end of tag: '</' should be here.
10145      */
10146     if (ctxt->sax2) {
10147 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10148 	namePop(ctxt);
10149     }
10150 #ifdef LIBXML_SAX1_ENABLED
10151     else
10152 	xmlParseEndTag1(ctxt, 0);
10153 #endif /* LIBXML_SAX1_ENABLED */
10154 
10155     /*
10156      * Capture end position and add node
10157      */
10158     if ( ret != NULL && ctxt->record_info ) {
10159        node_info.end_pos = ctxt->input->consumed +
10160                           (CUR_PTR - ctxt->input->base);
10161        node_info.end_line = ctxt->input->line;
10162        node_info.node = ret;
10163        xmlParserAddNodeInfo(ctxt, &node_info);
10164     }
10165 }
10166 
10167 /**
10168  * xmlParseVersionNum:
10169  * @ctxt:  an XML parser context
10170  *
10171  * parse the XML version value.
10172  *
10173  * [26] VersionNum ::= '1.' [0-9]+
10174  *
10175  * In practice allow [0-9].[0-9]+ at that level
10176  *
10177  * Returns the string giving the XML version number, or NULL
10178  */
10179 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10180 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10181     xmlChar *buf = NULL;
10182     int len = 0;
10183     int size = 10;
10184     xmlChar cur;
10185 
10186     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10187     if (buf == NULL) {
10188 	xmlErrMemory(ctxt, NULL);
10189 	return(NULL);
10190     }
10191     cur = CUR;
10192     if (!((cur >= '0') && (cur <= '9'))) {
10193 	xmlFree(buf);
10194 	return(NULL);
10195     }
10196     buf[len++] = cur;
10197     NEXT;
10198     cur=CUR;
10199     if (cur != '.') {
10200 	xmlFree(buf);
10201 	return(NULL);
10202     }
10203     buf[len++] = cur;
10204     NEXT;
10205     cur=CUR;
10206     while ((cur >= '0') && (cur <= '9')) {
10207 	if (len + 1 >= size) {
10208 	    xmlChar *tmp;
10209 
10210 	    size *= 2;
10211 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10212 	    if (tmp == NULL) {
10213 	        xmlFree(buf);
10214 		xmlErrMemory(ctxt, NULL);
10215 		return(NULL);
10216 	    }
10217 	    buf = tmp;
10218 	}
10219 	buf[len++] = cur;
10220 	NEXT;
10221 	cur=CUR;
10222     }
10223     buf[len] = 0;
10224     return(buf);
10225 }
10226 
10227 /**
10228  * xmlParseVersionInfo:
10229  * @ctxt:  an XML parser context
10230  *
10231  * parse the XML version.
10232  *
10233  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10234  *
10235  * [25] Eq ::= S? '=' S?
10236  *
10237  * Returns the version string, e.g. "1.0"
10238  */
10239 
10240 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10241 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10242     xmlChar *version = NULL;
10243 
10244     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10245 	SKIP(7);
10246 	SKIP_BLANKS;
10247 	if (RAW != '=') {
10248 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 	    return(NULL);
10250         }
10251 	NEXT;
10252 	SKIP_BLANKS;
10253 	if (RAW == '"') {
10254 	    NEXT;
10255 	    version = xmlParseVersionNum(ctxt);
10256 	    if (RAW != '"') {
10257 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 	    } else
10259 	        NEXT;
10260 	} else if (RAW == '\''){
10261 	    NEXT;
10262 	    version = xmlParseVersionNum(ctxt);
10263 	    if (RAW != '\'') {
10264 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10265 	    } else
10266 	        NEXT;
10267 	} else {
10268 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10269 	}
10270     }
10271     return(version);
10272 }
10273 
10274 /**
10275  * xmlParseEncName:
10276  * @ctxt:  an XML parser context
10277  *
10278  * parse the XML encoding name
10279  *
10280  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10281  *
10282  * Returns the encoding name value or NULL
10283  */
10284 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10285 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10286     xmlChar *buf = NULL;
10287     int len = 0;
10288     int size = 10;
10289     xmlChar cur;
10290 
10291     cur = CUR;
10292     if (((cur >= 'a') && (cur <= 'z')) ||
10293         ((cur >= 'A') && (cur <= 'Z'))) {
10294 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10295 	if (buf == NULL) {
10296 	    xmlErrMemory(ctxt, NULL);
10297 	    return(NULL);
10298 	}
10299 
10300 	buf[len++] = cur;
10301 	NEXT;
10302 	cur = CUR;
10303 	while (((cur >= 'a') && (cur <= 'z')) ||
10304 	       ((cur >= 'A') && (cur <= 'Z')) ||
10305 	       ((cur >= '0') && (cur <= '9')) ||
10306 	       (cur == '.') || (cur == '_') ||
10307 	       (cur == '-')) {
10308 	    if (len + 1 >= size) {
10309 	        xmlChar *tmp;
10310 
10311 		size *= 2;
10312 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10313 		if (tmp == NULL) {
10314 		    xmlErrMemory(ctxt, NULL);
10315 		    xmlFree(buf);
10316 		    return(NULL);
10317 		}
10318 		buf = tmp;
10319 	    }
10320 	    buf[len++] = cur;
10321 	    NEXT;
10322 	    cur = CUR;
10323 	    if (cur == 0) {
10324 	        SHRINK;
10325 		GROW;
10326 		cur = CUR;
10327 	    }
10328         }
10329 	buf[len] = 0;
10330     } else {
10331 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10332     }
10333     return(buf);
10334 }
10335 
10336 /**
10337  * xmlParseEncodingDecl:
10338  * @ctxt:  an XML parser context
10339  *
10340  * parse the XML encoding declaration
10341  *
10342  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10343  *
10344  * this setups the conversion filters.
10345  *
10346  * Returns the encoding value or NULL
10347  */
10348 
10349 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10350 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10351     xmlChar *encoding = NULL;
10352 
10353     SKIP_BLANKS;
10354     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10355 	SKIP(8);
10356 	SKIP_BLANKS;
10357 	if (RAW != '=') {
10358 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10359 	    return(NULL);
10360         }
10361 	NEXT;
10362 	SKIP_BLANKS;
10363 	if (RAW == '"') {
10364 	    NEXT;
10365 	    encoding = xmlParseEncName(ctxt);
10366 	    if (RAW != '"') {
10367 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10368 		xmlFree((xmlChar *) encoding);
10369 		return(NULL);
10370 	    } else
10371 	        NEXT;
10372 	} else if (RAW == '\''){
10373 	    NEXT;
10374 	    encoding = xmlParseEncName(ctxt);
10375 	    if (RAW != '\'') {
10376 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10377 		xmlFree((xmlChar *) encoding);
10378 		return(NULL);
10379 	    } else
10380 	        NEXT;
10381 	} else {
10382 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10383 	}
10384 
10385         /*
10386          * Non standard parsing, allowing the user to ignore encoding
10387          */
10388         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10389 	    xmlFree((xmlChar *) encoding);
10390             return(NULL);
10391 	}
10392 
10393 	/*
10394 	 * UTF-16 encoding switch has already taken place at this stage,
10395 	 * more over the little-endian/big-endian selection is already done
10396 	 */
10397         if ((encoding != NULL) &&
10398 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10399 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10400 	    /*
10401 	     * If no encoding was passed to the parser, that we are
10402 	     * using UTF-16 and no decoder is present i.e. the
10403 	     * document is apparently UTF-8 compatible, then raise an
10404 	     * encoding mismatch fatal error
10405 	     */
10406 	    if ((ctxt->encoding == NULL) &&
10407 	        (ctxt->input->buf != NULL) &&
10408 	        (ctxt->input->buf->encoder == NULL)) {
10409 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10410 		  "Document labelled UTF-16 but has UTF-8 content\n");
10411 	    }
10412 	    if (ctxt->encoding != NULL)
10413 		xmlFree((xmlChar *) ctxt->encoding);
10414 	    ctxt->encoding = encoding;
10415 	}
10416 	/*
10417 	 * UTF-8 encoding is handled natively
10418 	 */
10419         else if ((encoding != NULL) &&
10420 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10421 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10422 	    if (ctxt->encoding != NULL)
10423 		xmlFree((xmlChar *) ctxt->encoding);
10424 	    ctxt->encoding = encoding;
10425 	}
10426 	else if (encoding != NULL) {
10427 	    xmlCharEncodingHandlerPtr handler;
10428 
10429 	    if (ctxt->input->encoding != NULL)
10430 		xmlFree((xmlChar *) ctxt->input->encoding);
10431 	    ctxt->input->encoding = encoding;
10432 
10433             handler = xmlFindCharEncodingHandler((const char *) encoding);
10434 	    if (handler != NULL) {
10435 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10436 		    /* failed to convert */
10437 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10438 		    return(NULL);
10439 		}
10440 	    } else {
10441 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10442 			"Unsupported encoding %s\n", encoding);
10443 		return(NULL);
10444 	    }
10445 	}
10446     }
10447     return(encoding);
10448 }
10449 
10450 /**
10451  * xmlParseSDDecl:
10452  * @ctxt:  an XML parser context
10453  *
10454  * parse the XML standalone declaration
10455  *
10456  * [32] SDDecl ::= S 'standalone' Eq
10457  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10458  *
10459  * [ VC: Standalone Document Declaration ]
10460  * TODO The standalone document declaration must have the value "no"
10461  * if any external markup declarations contain declarations of:
10462  *  - attributes with default values, if elements to which these
10463  *    attributes apply appear in the document without specifications
10464  *    of values for these attributes, or
10465  *  - entities (other than amp, lt, gt, apos, quot), if references
10466  *    to those entities appear in the document, or
10467  *  - attributes with values subject to normalization, where the
10468  *    attribute appears in the document with a value which will change
10469  *    as a result of normalization, or
10470  *  - element types with element content, if white space occurs directly
10471  *    within any instance of those types.
10472  *
10473  * Returns:
10474  *   1 if standalone="yes"
10475  *   0 if standalone="no"
10476  *  -2 if standalone attribute is missing or invalid
10477  *	  (A standalone value of -2 means that the XML declaration was found,
10478  *	   but no value was specified for the standalone attribute).
10479  */
10480 
10481 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10482 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10483     int standalone = -2;
10484 
10485     SKIP_BLANKS;
10486     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10487 	SKIP(10);
10488         SKIP_BLANKS;
10489 	if (RAW != '=') {
10490 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10491 	    return(standalone);
10492         }
10493 	NEXT;
10494 	SKIP_BLANKS;
10495         if (RAW == '\''){
10496 	    NEXT;
10497 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10498 	        standalone = 0;
10499                 SKIP(2);
10500 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10501 	               (NXT(2) == 's')) {
10502 	        standalone = 1;
10503 		SKIP(3);
10504             } else {
10505 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10506 	    }
10507 	    if (RAW != '\'') {
10508 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10509 	    } else
10510 	        NEXT;
10511 	} else if (RAW == '"'){
10512 	    NEXT;
10513 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10514 	        standalone = 0;
10515 		SKIP(2);
10516 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10517 	               (NXT(2) == 's')) {
10518 	        standalone = 1;
10519                 SKIP(3);
10520             } else {
10521 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10522 	    }
10523 	    if (RAW != '"') {
10524 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10525 	    } else
10526 	        NEXT;
10527 	} else {
10528 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10529         }
10530     }
10531     return(standalone);
10532 }
10533 
10534 /**
10535  * xmlParseXMLDecl:
10536  * @ctxt:  an XML parser context
10537  *
10538  * parse an XML declaration header
10539  *
10540  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10541  */
10542 
10543 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10544 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10545     xmlChar *version;
10546 
10547     /*
10548      * This value for standalone indicates that the document has an
10549      * XML declaration but it does not have a standalone attribute.
10550      * It will be overwritten later if a standalone attribute is found.
10551      */
10552     ctxt->input->standalone = -2;
10553 
10554     /*
10555      * We know that '<?xml' is here.
10556      */
10557     SKIP(5);
10558 
10559     if (!IS_BLANK_CH(RAW)) {
10560 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10561 	               "Blank needed after '<?xml'\n");
10562     }
10563     SKIP_BLANKS;
10564 
10565     /*
10566      * We must have the VersionInfo here.
10567      */
10568     version = xmlParseVersionInfo(ctxt);
10569     if (version == NULL) {
10570 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10571     } else {
10572 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10573 	    /*
10574 	     * Changed here for XML-1.0 5th edition
10575 	     */
10576 	    if (ctxt->options & XML_PARSE_OLD10) {
10577 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10578 			          "Unsupported version '%s'\n",
10579 			          version);
10580 	    } else {
10581 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10582 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10583 		                  "Unsupported version '%s'\n",
10584 				  version, NULL);
10585 		} else {
10586 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10587 				      "Unsupported version '%s'\n",
10588 				      version);
10589 		}
10590 	    }
10591 	}
10592 	if (ctxt->version != NULL)
10593 	    xmlFree((void *) ctxt->version);
10594 	ctxt->version = version;
10595     }
10596 
10597     /*
10598      * We may have the encoding declaration
10599      */
10600     if (!IS_BLANK_CH(RAW)) {
10601         if ((RAW == '?') && (NXT(1) == '>')) {
10602 	    SKIP(2);
10603 	    return;
10604 	}
10605 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10606     }
10607     xmlParseEncodingDecl(ctxt);
10608     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10609          (ctxt->instate == XML_PARSER_EOF)) {
10610 	/*
10611 	 * The XML REC instructs us to stop parsing right here
10612 	 */
10613         return;
10614     }
10615 
10616     /*
10617      * We may have the standalone status.
10618      */
10619     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10620         if ((RAW == '?') && (NXT(1) == '>')) {
10621 	    SKIP(2);
10622 	    return;
10623 	}
10624 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10625     }
10626 
10627     /*
10628      * We can grow the input buffer freely at that point
10629      */
10630     GROW;
10631 
10632     SKIP_BLANKS;
10633     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10634 
10635     SKIP_BLANKS;
10636     if ((RAW == '?') && (NXT(1) == '>')) {
10637         SKIP(2);
10638     } else if (RAW == '>') {
10639         /* Deprecated old WD ... */
10640 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10641 	NEXT;
10642     } else {
10643 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10644 	MOVETO_ENDTAG(CUR_PTR);
10645 	NEXT;
10646     }
10647 }
10648 
10649 /**
10650  * xmlParseMisc:
10651  * @ctxt:  an XML parser context
10652  *
10653  * parse an XML Misc* optional field.
10654  *
10655  * [27] Misc ::= Comment | PI |  S
10656  */
10657 
10658 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10659 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10660     while ((ctxt->instate != XML_PARSER_EOF) &&
10661            (((RAW == '<') && (NXT(1) == '?')) ||
10662             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10663             IS_BLANK_CH(CUR))) {
10664         if ((RAW == '<') && (NXT(1) == '?')) {
10665 	    xmlParsePI(ctxt);
10666 	} else if (IS_BLANK_CH(CUR)) {
10667 	    NEXT;
10668 	} else
10669 	    xmlParseComment(ctxt);
10670     }
10671 }
10672 
10673 /**
10674  * xmlParseDocument:
10675  * @ctxt:  an XML parser context
10676  *
10677  * parse an XML document (and build a tree if using the standard SAX
10678  * interface).
10679  *
10680  * [1] document ::= prolog element Misc*
10681  *
10682  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10683  *
10684  * Returns 0, -1 in case of error. the parser context is augmented
10685  *                as a result of the parsing.
10686  */
10687 
10688 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10689 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10690     xmlChar start[4];
10691     xmlCharEncoding enc;
10692 
10693     xmlInitParser();
10694 
10695     if ((ctxt == NULL) || (ctxt->input == NULL))
10696         return(-1);
10697 
10698     GROW;
10699 
10700     /*
10701      * SAX: detecting the level.
10702      */
10703     xmlDetectSAX2(ctxt);
10704 
10705     /*
10706      * SAX: beginning of the document processing.
10707      */
10708     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10709         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10710     if (ctxt->instate == XML_PARSER_EOF)
10711 	return(-1);
10712 
10713     if ((ctxt->encoding == NULL) &&
10714         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10715 	/*
10716 	 * Get the 4 first bytes and decode the charset
10717 	 * if enc != XML_CHAR_ENCODING_NONE
10718 	 * plug some encoding conversion routines.
10719 	 */
10720 	start[0] = RAW;
10721 	start[1] = NXT(1);
10722 	start[2] = NXT(2);
10723 	start[3] = NXT(3);
10724 	enc = xmlDetectCharEncoding(&start[0], 4);
10725 	if (enc != XML_CHAR_ENCODING_NONE) {
10726 	    xmlSwitchEncoding(ctxt, enc);
10727 	}
10728     }
10729 
10730 
10731     if (CUR == 0) {
10732 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10733 	return(-1);
10734     }
10735 
10736     /*
10737      * Check for the XMLDecl in the Prolog.
10738      * do not GROW here to avoid the detected encoder to decode more
10739      * than just the first line, unless the amount of data is really
10740      * too small to hold "<?xml version="1.0" encoding="foo"
10741      */
10742     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10743        GROW;
10744     }
10745     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10746 
10747 	/*
10748 	 * Note that we will switch encoding on the fly.
10749 	 */
10750 	xmlParseXMLDecl(ctxt);
10751 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10752 	    (ctxt->instate == XML_PARSER_EOF)) {
10753 	    /*
10754 	     * The XML REC instructs us to stop parsing right here
10755 	     */
10756 	    return(-1);
10757 	}
10758 	ctxt->standalone = ctxt->input->standalone;
10759 	SKIP_BLANKS;
10760     } else {
10761 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10762     }
10763     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10764         ctxt->sax->startDocument(ctxt->userData);
10765     if (ctxt->instate == XML_PARSER_EOF)
10766 	return(-1);
10767     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10768         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10769 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10770     }
10771 
10772     /*
10773      * The Misc part of the Prolog
10774      */
10775     GROW;
10776     xmlParseMisc(ctxt);
10777 
10778     /*
10779      * Then possibly doc type declaration(s) and more Misc
10780      * (doctypedecl Misc*)?
10781      */
10782     GROW;
10783     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10784 
10785 	ctxt->inSubset = 1;
10786 	xmlParseDocTypeDecl(ctxt);
10787 	if (RAW == '[') {
10788 	    ctxt->instate = XML_PARSER_DTD;
10789 	    xmlParseInternalSubset(ctxt);
10790 	    if (ctxt->instate == XML_PARSER_EOF)
10791 		return(-1);
10792 	}
10793 
10794 	/*
10795 	 * Create and update the external subset.
10796 	 */
10797 	ctxt->inSubset = 2;
10798 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10799 	    (!ctxt->disableSAX))
10800 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10801 	                              ctxt->extSubSystem, ctxt->extSubURI);
10802 	if (ctxt->instate == XML_PARSER_EOF)
10803 	    return(-1);
10804 	ctxt->inSubset = 0;
10805 
10806         xmlCleanSpecialAttr(ctxt);
10807 
10808 	ctxt->instate = XML_PARSER_PROLOG;
10809 	xmlParseMisc(ctxt);
10810     }
10811 
10812     /*
10813      * Time to start parsing the tree itself
10814      */
10815     GROW;
10816     if (RAW != '<') {
10817 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10818 		       "Start tag expected, '<' not found\n");
10819     } else {
10820 	ctxt->instate = XML_PARSER_CONTENT;
10821 	xmlParseElement(ctxt);
10822 	ctxt->instate = XML_PARSER_EPILOG;
10823 
10824 
10825 	/*
10826 	 * The Misc part at the end
10827 	 */
10828 	xmlParseMisc(ctxt);
10829 
10830 	if (RAW != 0) {
10831 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10832 	}
10833 	ctxt->instate = XML_PARSER_EOF;
10834     }
10835 
10836     /*
10837      * SAX: end of the document processing.
10838      */
10839     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10840         ctxt->sax->endDocument(ctxt->userData);
10841 
10842     /*
10843      * Remove locally kept entity definitions if the tree was not built
10844      */
10845     if ((ctxt->myDoc != NULL) &&
10846 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10847 	xmlFreeDoc(ctxt->myDoc);
10848 	ctxt->myDoc = NULL;
10849     }
10850 
10851     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10852         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10853 	if (ctxt->valid)
10854 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10855 	if (ctxt->nsWellFormed)
10856 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10857 	if (ctxt->options & XML_PARSE_OLD10)
10858 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10859     }
10860     if (! ctxt->wellFormed) {
10861 	ctxt->valid = 0;
10862 	return(-1);
10863     }
10864     return(0);
10865 }
10866 
10867 /**
10868  * xmlParseExtParsedEnt:
10869  * @ctxt:  an XML parser context
10870  *
10871  * parse a general parsed entity
10872  * An external general parsed entity is well-formed if it matches the
10873  * production labeled extParsedEnt.
10874  *
10875  * [78] extParsedEnt ::= TextDecl? content
10876  *
10877  * Returns 0, -1 in case of error. the parser context is augmented
10878  *                as a result of the parsing.
10879  */
10880 
10881 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10882 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10883     xmlChar start[4];
10884     xmlCharEncoding enc;
10885 
10886     if ((ctxt == NULL) || (ctxt->input == NULL))
10887         return(-1);
10888 
10889     xmlDefaultSAXHandlerInit();
10890 
10891     xmlDetectSAX2(ctxt);
10892 
10893     GROW;
10894 
10895     /*
10896      * SAX: beginning of the document processing.
10897      */
10898     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10899         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10900 
10901     /*
10902      * Get the 4 first bytes and decode the charset
10903      * if enc != XML_CHAR_ENCODING_NONE
10904      * plug some encoding conversion routines.
10905      */
10906     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10907 	start[0] = RAW;
10908 	start[1] = NXT(1);
10909 	start[2] = NXT(2);
10910 	start[3] = NXT(3);
10911 	enc = xmlDetectCharEncoding(start, 4);
10912 	if (enc != XML_CHAR_ENCODING_NONE) {
10913 	    xmlSwitchEncoding(ctxt, enc);
10914 	}
10915     }
10916 
10917 
10918     if (CUR == 0) {
10919 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10920     }
10921 
10922     /*
10923      * Check for the XMLDecl in the Prolog.
10924      */
10925     GROW;
10926     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10927 
10928 	/*
10929 	 * Note that we will switch encoding on the fly.
10930 	 */
10931 	xmlParseXMLDecl(ctxt);
10932 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10933 	    /*
10934 	     * The XML REC instructs us to stop parsing right here
10935 	     */
10936 	    return(-1);
10937 	}
10938 	SKIP_BLANKS;
10939     } else {
10940 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10941     }
10942     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10943         ctxt->sax->startDocument(ctxt->userData);
10944     if (ctxt->instate == XML_PARSER_EOF)
10945 	return(-1);
10946 
10947     /*
10948      * Doing validity checking on chunk doesn't make sense
10949      */
10950     ctxt->instate = XML_PARSER_CONTENT;
10951     ctxt->validate = 0;
10952     ctxt->loadsubset = 0;
10953     ctxt->depth = 0;
10954 
10955     xmlParseContent(ctxt);
10956     if (ctxt->instate == XML_PARSER_EOF)
10957 	return(-1);
10958 
10959     if ((RAW == '<') && (NXT(1) == '/')) {
10960 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10961     } else if (RAW != 0) {
10962 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10963     }
10964 
10965     /*
10966      * SAX: end of the document processing.
10967      */
10968     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10969         ctxt->sax->endDocument(ctxt->userData);
10970 
10971     if (! ctxt->wellFormed) return(-1);
10972     return(0);
10973 }
10974 
10975 #ifdef LIBXML_PUSH_ENABLED
10976 /************************************************************************
10977  *									*
10978  *		Progressive parsing interfaces				*
10979  *									*
10980  ************************************************************************/
10981 
10982 /**
10983  * xmlParseLookupSequence:
10984  * @ctxt:  an XML parser context
10985  * @first:  the first char to lookup
10986  * @next:  the next char to lookup or zero
10987  * @third:  the next char to lookup or zero
10988  *
10989  * Try to find if a sequence (first, next, third) or  just (first next) or
10990  * (first) is available in the input stream.
10991  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10992  * to avoid rescanning sequences of bytes, it DOES change the state of the
10993  * parser, do not use liberally.
10994  *
10995  * Returns the index to the current parsing point if the full sequence
10996  *      is available, -1 otherwise.
10997  */
10998 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10999 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11000                        xmlChar next, xmlChar third) {
11001     int base, len;
11002     xmlParserInputPtr in;
11003     const xmlChar *buf;
11004 
11005     in = ctxt->input;
11006     if (in == NULL) return(-1);
11007     base = in->cur - in->base;
11008     if (base < 0) return(-1);
11009     if (ctxt->checkIndex > base)
11010         base = ctxt->checkIndex;
11011     if (in->buf == NULL) {
11012 	buf = in->base;
11013 	len = in->length;
11014     } else {
11015 	buf = xmlBufContent(in->buf->buffer);
11016 	len = xmlBufUse(in->buf->buffer);
11017     }
11018     /* take into account the sequence length */
11019     if (third) len -= 2;
11020     else if (next) len --;
11021     for (;base < len;base++) {
11022         if (buf[base] == first) {
11023 	    if (third != 0) {
11024 		if ((buf[base + 1] != next) ||
11025 		    (buf[base + 2] != third)) continue;
11026 	    } else if (next != 0) {
11027 		if (buf[base + 1] != next) continue;
11028 	    }
11029 	    ctxt->checkIndex = 0;
11030 #ifdef DEBUG_PUSH
11031 	    if (next == 0)
11032 		xmlGenericError(xmlGenericErrorContext,
11033 			"PP: lookup '%c' found at %d\n",
11034 			first, base);
11035 	    else if (third == 0)
11036 		xmlGenericError(xmlGenericErrorContext,
11037 			"PP: lookup '%c%c' found at %d\n",
11038 			first, next, base);
11039 	    else
11040 		xmlGenericError(xmlGenericErrorContext,
11041 			"PP: lookup '%c%c%c' found at %d\n",
11042 			first, next, third, base);
11043 #endif
11044 	    return(base - (in->cur - in->base));
11045 	}
11046     }
11047     ctxt->checkIndex = base;
11048 #ifdef DEBUG_PUSH
11049     if (next == 0)
11050 	xmlGenericError(xmlGenericErrorContext,
11051 		"PP: lookup '%c' failed\n", first);
11052     else if (third == 0)
11053 	xmlGenericError(xmlGenericErrorContext,
11054 		"PP: lookup '%c%c' failed\n", first, next);
11055     else
11056 	xmlGenericError(xmlGenericErrorContext,
11057 		"PP: lookup '%c%c%c' failed\n", first, next, third);
11058 #endif
11059     return(-1);
11060 }
11061 
11062 /**
11063  * xmlParseGetLasts:
11064  * @ctxt:  an XML parser context
11065  * @lastlt:  pointer to store the last '<' from the input
11066  * @lastgt:  pointer to store the last '>' from the input
11067  *
11068  * Lookup the last < and > in the current chunk
11069  */
11070 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11071 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11072                  const xmlChar **lastgt) {
11073     const xmlChar *tmp;
11074 
11075     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11076 	xmlGenericError(xmlGenericErrorContext,
11077 		    "Internal error: xmlParseGetLasts\n");
11078 	return;
11079     }
11080     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11081         tmp = ctxt->input->end;
11082 	tmp--;
11083 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11084 	if (tmp < ctxt->input->base) {
11085 	    *lastlt = NULL;
11086 	    *lastgt = NULL;
11087 	} else {
11088 	    *lastlt = tmp;
11089 	    tmp++;
11090 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11091 	        if (*tmp == '\'') {
11092 		    tmp++;
11093 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11094 		    if (tmp < ctxt->input->end) tmp++;
11095 		} else if (*tmp == '"') {
11096 		    tmp++;
11097 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11098 		    if (tmp < ctxt->input->end) tmp++;
11099 		} else
11100 		    tmp++;
11101 	    }
11102 	    if (tmp < ctxt->input->end)
11103 	        *lastgt = tmp;
11104 	    else {
11105 	        tmp = *lastlt;
11106 		tmp--;
11107 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11108 		if (tmp >= ctxt->input->base)
11109 		    *lastgt = tmp;
11110 		else
11111 		    *lastgt = NULL;
11112 	    }
11113 	}
11114     } else {
11115         *lastlt = NULL;
11116 	*lastgt = NULL;
11117     }
11118 }
11119 /**
11120  * xmlCheckCdataPush:
11121  * @cur: pointer to the block of characters
11122  * @len: length of the block in bytes
11123  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11124  *
11125  * Check that the block of characters is okay as SCdata content [20]
11126  *
11127  * Returns the number of bytes to pass if okay, a negative index where an
11128  *         UTF-8 error occurred otherwise
11129  */
11130 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11131 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11132     int ix;
11133     unsigned char c;
11134     int codepoint;
11135 
11136     if ((utf == NULL) || (len <= 0))
11137         return(0);
11138 
11139     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11140         c = utf[ix];
11141         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11142 	    if (c >= 0x20)
11143 		ix++;
11144 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11145 	        ix++;
11146 	    else
11147 	        return(-ix);
11148 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11149 	    if (ix + 2 > len) return(complete ? -ix : ix);
11150 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11151 	        return(-ix);
11152 	    codepoint = (utf[ix] & 0x1f) << 6;
11153 	    codepoint |= utf[ix+1] & 0x3f;
11154 	    if (!xmlIsCharQ(codepoint))
11155 	        return(-ix);
11156 	    ix += 2;
11157 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11158 	    if (ix + 3 > len) return(complete ? -ix : ix);
11159 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11160 	        ((utf[ix+2] & 0xc0) != 0x80))
11161 		    return(-ix);
11162 	    codepoint = (utf[ix] & 0xf) << 12;
11163 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11164 	    codepoint |= utf[ix+2] & 0x3f;
11165 	    if (!xmlIsCharQ(codepoint))
11166 	        return(-ix);
11167 	    ix += 3;
11168 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11169 	    if (ix + 4 > len) return(complete ? -ix : ix);
11170 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11171 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11172 		((utf[ix+3] & 0xc0) != 0x80))
11173 		    return(-ix);
11174 	    codepoint = (utf[ix] & 0x7) << 18;
11175 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11176 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11177 	    codepoint |= utf[ix+3] & 0x3f;
11178 	    if (!xmlIsCharQ(codepoint))
11179 	        return(-ix);
11180 	    ix += 4;
11181 	} else				/* unknown encoding */
11182 	    return(-ix);
11183       }
11184       return(ix);
11185 }
11186 
11187 /**
11188  * xmlParseTryOrFinish:
11189  * @ctxt:  an XML parser context
11190  * @terminate:  last chunk indicator
11191  *
11192  * Try to progress on parsing
11193  *
11194  * Returns zero if no parsing was possible
11195  */
11196 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11197 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11198     int ret = 0;
11199     int avail, tlen;
11200     xmlChar cur, next;
11201     const xmlChar *lastlt, *lastgt;
11202 
11203     if (ctxt->input == NULL)
11204         return(0);
11205 
11206 #ifdef DEBUG_PUSH
11207     switch (ctxt->instate) {
11208 	case XML_PARSER_EOF:
11209 	    xmlGenericError(xmlGenericErrorContext,
11210 		    "PP: try EOF\n"); break;
11211 	case XML_PARSER_START:
11212 	    xmlGenericError(xmlGenericErrorContext,
11213 		    "PP: try START\n"); break;
11214 	case XML_PARSER_MISC:
11215 	    xmlGenericError(xmlGenericErrorContext,
11216 		    "PP: try MISC\n");break;
11217 	case XML_PARSER_COMMENT:
11218 	    xmlGenericError(xmlGenericErrorContext,
11219 		    "PP: try COMMENT\n");break;
11220 	case XML_PARSER_PROLOG:
11221 	    xmlGenericError(xmlGenericErrorContext,
11222 		    "PP: try PROLOG\n");break;
11223 	case XML_PARSER_START_TAG:
11224 	    xmlGenericError(xmlGenericErrorContext,
11225 		    "PP: try START_TAG\n");break;
11226 	case XML_PARSER_CONTENT:
11227 	    xmlGenericError(xmlGenericErrorContext,
11228 		    "PP: try CONTENT\n");break;
11229 	case XML_PARSER_CDATA_SECTION:
11230 	    xmlGenericError(xmlGenericErrorContext,
11231 		    "PP: try CDATA_SECTION\n");break;
11232 	case XML_PARSER_END_TAG:
11233 	    xmlGenericError(xmlGenericErrorContext,
11234 		    "PP: try END_TAG\n");break;
11235 	case XML_PARSER_ENTITY_DECL:
11236 	    xmlGenericError(xmlGenericErrorContext,
11237 		    "PP: try ENTITY_DECL\n");break;
11238 	case XML_PARSER_ENTITY_VALUE:
11239 	    xmlGenericError(xmlGenericErrorContext,
11240 		    "PP: try ENTITY_VALUE\n");break;
11241 	case XML_PARSER_ATTRIBUTE_VALUE:
11242 	    xmlGenericError(xmlGenericErrorContext,
11243 		    "PP: try ATTRIBUTE_VALUE\n");break;
11244 	case XML_PARSER_DTD:
11245 	    xmlGenericError(xmlGenericErrorContext,
11246 		    "PP: try DTD\n");break;
11247 	case XML_PARSER_EPILOG:
11248 	    xmlGenericError(xmlGenericErrorContext,
11249 		    "PP: try EPILOG\n");break;
11250 	case XML_PARSER_PI:
11251 	    xmlGenericError(xmlGenericErrorContext,
11252 		    "PP: try PI\n");break;
11253         case XML_PARSER_IGNORE:
11254             xmlGenericError(xmlGenericErrorContext,
11255 		    "PP: try IGNORE\n");break;
11256     }
11257 #endif
11258 
11259     if ((ctxt->input != NULL) &&
11260         (ctxt->input->cur - ctxt->input->base > 4096)) {
11261 	xmlSHRINK(ctxt);
11262 	ctxt->checkIndex = 0;
11263     }
11264     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11265 
11266     while (ctxt->instate != XML_PARSER_EOF) {
11267 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11268 	    return(0);
11269 
11270 	if (ctxt->input == NULL) break;
11271 	if (ctxt->input->buf == NULL)
11272 	    avail = ctxt->input->length -
11273 	            (ctxt->input->cur - ctxt->input->base);
11274 	else {
11275 	    /*
11276 	     * If we are operating on converted input, try to flush
11277 	     * remaining chars to avoid them stalling in the non-converted
11278 	     * buffer. But do not do this in document start where
11279 	     * encoding="..." may not have been read and we work on a
11280 	     * guessed encoding.
11281 	     */
11282 	    if ((ctxt->instate != XML_PARSER_START) &&
11283 	        (ctxt->input->buf->raw != NULL) &&
11284 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11285                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11286                                                  ctxt->input);
11287 		size_t current = ctxt->input->cur - ctxt->input->base;
11288 
11289 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11290                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11291                                       base, current);
11292 	    }
11293 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11294 		    (ctxt->input->cur - ctxt->input->base);
11295 	}
11296         if (avail < 1)
11297 	    goto done;
11298         switch (ctxt->instate) {
11299             case XML_PARSER_EOF:
11300 	        /*
11301 		 * Document parsing is done !
11302 		 */
11303 	        goto done;
11304             case XML_PARSER_START:
11305 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11306 		    xmlChar start[4];
11307 		    xmlCharEncoding enc;
11308 
11309 		    /*
11310 		     * Very first chars read from the document flow.
11311 		     */
11312 		    if (avail < 4)
11313 			goto done;
11314 
11315 		    /*
11316 		     * Get the 4 first bytes and decode the charset
11317 		     * if enc != XML_CHAR_ENCODING_NONE
11318 		     * plug some encoding conversion routines,
11319 		     * else xmlSwitchEncoding will set to (default)
11320 		     * UTF8.
11321 		     */
11322 		    start[0] = RAW;
11323 		    start[1] = NXT(1);
11324 		    start[2] = NXT(2);
11325 		    start[3] = NXT(3);
11326 		    enc = xmlDetectCharEncoding(start, 4);
11327 		    xmlSwitchEncoding(ctxt, enc);
11328 		    break;
11329 		}
11330 
11331 		if (avail < 2)
11332 		    goto done;
11333 		cur = ctxt->input->cur[0];
11334 		next = ctxt->input->cur[1];
11335 		if (cur == 0) {
11336 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11337 			ctxt->sax->setDocumentLocator(ctxt->userData,
11338 						      &xmlDefaultSAXLocator);
11339 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11340 		    xmlHaltParser(ctxt);
11341 #ifdef DEBUG_PUSH
11342 		    xmlGenericError(xmlGenericErrorContext,
11343 			    "PP: entering EOF\n");
11344 #endif
11345 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11346 			ctxt->sax->endDocument(ctxt->userData);
11347 		    goto done;
11348 		}
11349 	        if ((cur == '<') && (next == '?')) {
11350 		    /* PI or XML decl */
11351 		    if (avail < 5) return(ret);
11352 		    if ((!terminate) &&
11353 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11354 			return(ret);
11355 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11356 			ctxt->sax->setDocumentLocator(ctxt->userData,
11357 						      &xmlDefaultSAXLocator);
11358 		    if ((ctxt->input->cur[2] == 'x') &&
11359 			(ctxt->input->cur[3] == 'm') &&
11360 			(ctxt->input->cur[4] == 'l') &&
11361 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11362 			ret += 5;
11363 #ifdef DEBUG_PUSH
11364 			xmlGenericError(xmlGenericErrorContext,
11365 				"PP: Parsing XML Decl\n");
11366 #endif
11367 			xmlParseXMLDecl(ctxt);
11368 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11369 			    /*
11370 			     * The XML REC instructs us to stop parsing right
11371 			     * here
11372 			     */
11373 			    xmlHaltParser(ctxt);
11374 			    return(0);
11375 			}
11376 			ctxt->standalone = ctxt->input->standalone;
11377 			if ((ctxt->encoding == NULL) &&
11378 			    (ctxt->input->encoding != NULL))
11379 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11380 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11381 			    (!ctxt->disableSAX))
11382 			    ctxt->sax->startDocument(ctxt->userData);
11383 			ctxt->instate = XML_PARSER_MISC;
11384 #ifdef DEBUG_PUSH
11385 			xmlGenericError(xmlGenericErrorContext,
11386 				"PP: entering MISC\n");
11387 #endif
11388 		    } else {
11389 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11390 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11391 			    (!ctxt->disableSAX))
11392 			    ctxt->sax->startDocument(ctxt->userData);
11393 			ctxt->instate = XML_PARSER_MISC;
11394 #ifdef DEBUG_PUSH
11395 			xmlGenericError(xmlGenericErrorContext,
11396 				"PP: entering MISC\n");
11397 #endif
11398 		    }
11399 		} else {
11400 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11401 			ctxt->sax->setDocumentLocator(ctxt->userData,
11402 						      &xmlDefaultSAXLocator);
11403 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11404 		    if (ctxt->version == NULL) {
11405 		        xmlErrMemory(ctxt, NULL);
11406 			break;
11407 		    }
11408 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11409 		        (!ctxt->disableSAX))
11410 			ctxt->sax->startDocument(ctxt->userData);
11411 		    ctxt->instate = XML_PARSER_MISC;
11412 #ifdef DEBUG_PUSH
11413 		    xmlGenericError(xmlGenericErrorContext,
11414 			    "PP: entering MISC\n");
11415 #endif
11416 		}
11417 		break;
11418             case XML_PARSER_START_TAG: {
11419 	        const xmlChar *name;
11420 		const xmlChar *prefix = NULL;
11421 		const xmlChar *URI = NULL;
11422                 int line = ctxt->input->line;
11423 		int nsNr = ctxt->nsNr;
11424 
11425 		if ((avail < 2) && (ctxt->inputNr == 1))
11426 		    goto done;
11427 		cur = ctxt->input->cur[0];
11428 	        if (cur != '<') {
11429 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11430 		    xmlHaltParser(ctxt);
11431 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11432 			ctxt->sax->endDocument(ctxt->userData);
11433 		    goto done;
11434 		}
11435 		if (!terminate) {
11436 		    if (ctxt->progressive) {
11437 		        /* > can be found unescaped in attribute values */
11438 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11439 			    goto done;
11440 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11441 			goto done;
11442 		    }
11443 		}
11444 		if (ctxt->spaceNr == 0)
11445 		    spacePush(ctxt, -1);
11446 		else if (*ctxt->space == -2)
11447 		    spacePush(ctxt, -1);
11448 		else
11449 		    spacePush(ctxt, *ctxt->space);
11450 #ifdef LIBXML_SAX1_ENABLED
11451 		if (ctxt->sax2)
11452 #endif /* LIBXML_SAX1_ENABLED */
11453 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11454 #ifdef LIBXML_SAX1_ENABLED
11455 		else
11456 		    name = xmlParseStartTag(ctxt);
11457 #endif /* LIBXML_SAX1_ENABLED */
11458 		if (ctxt->instate == XML_PARSER_EOF)
11459 		    goto done;
11460 		if (name == NULL) {
11461 		    spacePop(ctxt);
11462 		    xmlHaltParser(ctxt);
11463 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11464 			ctxt->sax->endDocument(ctxt->userData);
11465 		    goto done;
11466 		}
11467 #ifdef LIBXML_VALID_ENABLED
11468 		/*
11469 		 * [ VC: Root Element Type ]
11470 		 * The Name in the document type declaration must match
11471 		 * the element type of the root element.
11472 		 */
11473 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11474 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11475 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11476 #endif /* LIBXML_VALID_ENABLED */
11477 
11478 		/*
11479 		 * Check for an Empty Element.
11480 		 */
11481 		if ((RAW == '/') && (NXT(1) == '>')) {
11482 		    SKIP(2);
11483 
11484 		    if (ctxt->sax2) {
11485 			if ((ctxt->sax != NULL) &&
11486 			    (ctxt->sax->endElementNs != NULL) &&
11487 			    (!ctxt->disableSAX))
11488 			    ctxt->sax->endElementNs(ctxt->userData, name,
11489 			                            prefix, URI);
11490 			if (ctxt->nsNr - nsNr > 0)
11491 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11492 #ifdef LIBXML_SAX1_ENABLED
11493 		    } else {
11494 			if ((ctxt->sax != NULL) &&
11495 			    (ctxt->sax->endElement != NULL) &&
11496 			    (!ctxt->disableSAX))
11497 			    ctxt->sax->endElement(ctxt->userData, name);
11498 #endif /* LIBXML_SAX1_ENABLED */
11499 		    }
11500 		    if (ctxt->instate == XML_PARSER_EOF)
11501 			goto done;
11502 		    spacePop(ctxt);
11503 		    if (ctxt->nameNr == 0) {
11504 			ctxt->instate = XML_PARSER_EPILOG;
11505 		    } else {
11506 			ctxt->instate = XML_PARSER_CONTENT;
11507 		    }
11508                     ctxt->progressive = 1;
11509 		    break;
11510 		}
11511 		if (RAW == '>') {
11512 		    NEXT;
11513 		} else {
11514 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11515 					 "Couldn't find end of Start Tag %s\n",
11516 					 name);
11517 		    nodePop(ctxt);
11518 		    spacePop(ctxt);
11519 		}
11520                 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11521 
11522 		ctxt->instate = XML_PARSER_CONTENT;
11523                 ctxt->progressive = 1;
11524                 break;
11525 	    }
11526             case XML_PARSER_CONTENT: {
11527 		const xmlChar *test;
11528 		unsigned int cons;
11529 		if ((avail < 2) && (ctxt->inputNr == 1))
11530 		    goto done;
11531 		cur = ctxt->input->cur[0];
11532 		next = ctxt->input->cur[1];
11533 
11534 		test = CUR_PTR;
11535 	        cons = ctxt->input->consumed;
11536 		if ((cur == '<') && (next == '/')) {
11537 		    ctxt->instate = XML_PARSER_END_TAG;
11538 		    break;
11539 	        } else if ((cur == '<') && (next == '?')) {
11540 		    if ((!terminate) &&
11541 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11542                         ctxt->progressive = XML_PARSER_PI;
11543 			goto done;
11544                     }
11545 		    xmlParsePI(ctxt);
11546 		    ctxt->instate = XML_PARSER_CONTENT;
11547                     ctxt->progressive = 1;
11548 		} else if ((cur == '<') && (next != '!')) {
11549 		    ctxt->instate = XML_PARSER_START_TAG;
11550 		    break;
11551 		} else if ((cur == '<') && (next == '!') &&
11552 		           (ctxt->input->cur[2] == '-') &&
11553 			   (ctxt->input->cur[3] == '-')) {
11554 		    int term;
11555 
11556 	            if (avail < 4)
11557 		        goto done;
11558 		    ctxt->input->cur += 4;
11559 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11560 		    ctxt->input->cur -= 4;
11561 		    if ((!terminate) && (term < 0)) {
11562                         ctxt->progressive = XML_PARSER_COMMENT;
11563 			goto done;
11564                     }
11565 		    xmlParseComment(ctxt);
11566 		    ctxt->instate = XML_PARSER_CONTENT;
11567                     ctxt->progressive = 1;
11568 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11569 		    (ctxt->input->cur[2] == '[') &&
11570 		    (ctxt->input->cur[3] == 'C') &&
11571 		    (ctxt->input->cur[4] == 'D') &&
11572 		    (ctxt->input->cur[5] == 'A') &&
11573 		    (ctxt->input->cur[6] == 'T') &&
11574 		    (ctxt->input->cur[7] == 'A') &&
11575 		    (ctxt->input->cur[8] == '[')) {
11576 		    SKIP(9);
11577 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11578 		    break;
11579 		} else if ((cur == '<') && (next == '!') &&
11580 		           (avail < 9)) {
11581 		    goto done;
11582 		} else if (cur == '&') {
11583 		    if ((!terminate) &&
11584 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11585 			goto done;
11586 		    xmlParseReference(ctxt);
11587 		} else {
11588 		    /* TODO Avoid the extra copy, handle directly !!! */
11589 		    /*
11590 		     * Goal of the following test is:
11591 		     *  - minimize calls to the SAX 'character' callback
11592 		     *    when they are mergeable
11593 		     *  - handle an problem for isBlank when we only parse
11594 		     *    a sequence of blank chars and the next one is
11595 		     *    not available to check against '<' presence.
11596 		     *  - tries to homogenize the differences in SAX
11597 		     *    callbacks between the push and pull versions
11598 		     *    of the parser.
11599 		     */
11600 		    if ((ctxt->inputNr == 1) &&
11601 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11602 			if (!terminate) {
11603 			    if (ctxt->progressive) {
11604 				if ((lastlt == NULL) ||
11605 				    (ctxt->input->cur > lastlt))
11606 				    goto done;
11607 			    } else if (xmlParseLookupSequence(ctxt,
11608 			                                      '<', 0, 0) < 0) {
11609 				goto done;
11610 			    }
11611 			}
11612                     }
11613 		    ctxt->checkIndex = 0;
11614 		    xmlParseCharData(ctxt, 0);
11615 		}
11616 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11617 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11618 		                "detected an error in element content\n");
11619 		    xmlHaltParser(ctxt);
11620 		    break;
11621 		}
11622 		break;
11623 	    }
11624             case XML_PARSER_END_TAG:
11625 		if (avail < 2)
11626 		    goto done;
11627 		if (!terminate) {
11628 		    if (ctxt->progressive) {
11629 		        /* > can be found unescaped in attribute values */
11630 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11631 			    goto done;
11632 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11633 			goto done;
11634 		    }
11635 		}
11636 		if (ctxt->sax2) {
11637 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11638 		    nameNsPop(ctxt);
11639 		}
11640 #ifdef LIBXML_SAX1_ENABLED
11641 		  else
11642 		    xmlParseEndTag1(ctxt, 0);
11643 #endif /* LIBXML_SAX1_ENABLED */
11644 		if (ctxt->instate == XML_PARSER_EOF) {
11645 		    /* Nothing */
11646 		} else if (ctxt->nameNr == 0) {
11647 		    ctxt->instate = XML_PARSER_EPILOG;
11648 		} else {
11649 		    ctxt->instate = XML_PARSER_CONTENT;
11650 		}
11651 		break;
11652             case XML_PARSER_CDATA_SECTION: {
11653 	        /*
11654 		 * The Push mode need to have the SAX callback for
11655 		 * cdataBlock merge back contiguous callbacks.
11656 		 */
11657 		int base;
11658 
11659 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11660 		if (base < 0) {
11661 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11662 		        int tmp;
11663 
11664 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11665 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11666 			if (tmp < 0) {
11667 			    tmp = -tmp;
11668 			    ctxt->input->cur += tmp;
11669 			    goto encoding_error;
11670 			}
11671 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11672 			    if (ctxt->sax->cdataBlock != NULL)
11673 				ctxt->sax->cdataBlock(ctxt->userData,
11674 				                      ctxt->input->cur, tmp);
11675 			    else if (ctxt->sax->characters != NULL)
11676 				ctxt->sax->characters(ctxt->userData,
11677 				                      ctxt->input->cur, tmp);
11678 			}
11679 			if (ctxt->instate == XML_PARSER_EOF)
11680 			    goto done;
11681 			SKIPL(tmp);
11682 			ctxt->checkIndex = 0;
11683 		    }
11684 		    goto done;
11685 		} else {
11686 		    int tmp;
11687 
11688 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11689 		    if ((tmp < 0) || (tmp != base)) {
11690 			tmp = -tmp;
11691 			ctxt->input->cur += tmp;
11692 			goto encoding_error;
11693 		    }
11694 		    if ((ctxt->sax != NULL) && (base == 0) &&
11695 		        (ctxt->sax->cdataBlock != NULL) &&
11696 		        (!ctxt->disableSAX)) {
11697 			/*
11698 			 * Special case to provide identical behaviour
11699 			 * between pull and push parsers on enpty CDATA
11700 			 * sections
11701 			 */
11702 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11703 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11704 			               "<![CDATA[", 9)))
11705 			     ctxt->sax->cdataBlock(ctxt->userData,
11706 			                           BAD_CAST "", 0);
11707 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11708 			(!ctxt->disableSAX)) {
11709 			if (ctxt->sax->cdataBlock != NULL)
11710 			    ctxt->sax->cdataBlock(ctxt->userData,
11711 						  ctxt->input->cur, base);
11712 			else if (ctxt->sax->characters != NULL)
11713 			    ctxt->sax->characters(ctxt->userData,
11714 						  ctxt->input->cur, base);
11715 		    }
11716 		    if (ctxt->instate == XML_PARSER_EOF)
11717 			goto done;
11718 		    SKIPL(base + 3);
11719 		    ctxt->checkIndex = 0;
11720 		    ctxt->instate = XML_PARSER_CONTENT;
11721 #ifdef DEBUG_PUSH
11722 		    xmlGenericError(xmlGenericErrorContext,
11723 			    "PP: entering CONTENT\n");
11724 #endif
11725 		}
11726 		break;
11727 	    }
11728             case XML_PARSER_MISC:
11729 		SKIP_BLANKS;
11730 		if (ctxt->input->buf == NULL)
11731 		    avail = ctxt->input->length -
11732 		            (ctxt->input->cur - ctxt->input->base);
11733 		else
11734 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11735 		            (ctxt->input->cur - ctxt->input->base);
11736 		if (avail < 2)
11737 		    goto done;
11738 		cur = ctxt->input->cur[0];
11739 		next = ctxt->input->cur[1];
11740 	        if ((cur == '<') && (next == '?')) {
11741 		    if ((!terminate) &&
11742 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11743                         ctxt->progressive = XML_PARSER_PI;
11744 			goto done;
11745                     }
11746 #ifdef DEBUG_PUSH
11747 		    xmlGenericError(xmlGenericErrorContext,
11748 			    "PP: Parsing PI\n");
11749 #endif
11750 		    xmlParsePI(ctxt);
11751 		    if (ctxt->instate == XML_PARSER_EOF)
11752 			goto done;
11753 		    ctxt->instate = XML_PARSER_MISC;
11754                     ctxt->progressive = 1;
11755 		    ctxt->checkIndex = 0;
11756 		} else if ((cur == '<') && (next == '!') &&
11757 		    (ctxt->input->cur[2] == '-') &&
11758 		    (ctxt->input->cur[3] == '-')) {
11759 		    if ((!terminate) &&
11760 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11761                         ctxt->progressive = XML_PARSER_COMMENT;
11762 			goto done;
11763                     }
11764 #ifdef DEBUG_PUSH
11765 		    xmlGenericError(xmlGenericErrorContext,
11766 			    "PP: Parsing Comment\n");
11767 #endif
11768 		    xmlParseComment(ctxt);
11769 		    if (ctxt->instate == XML_PARSER_EOF)
11770 			goto done;
11771 		    ctxt->instate = XML_PARSER_MISC;
11772                     ctxt->progressive = 1;
11773 		    ctxt->checkIndex = 0;
11774 		} else if ((cur == '<') && (next == '!') &&
11775 		    (ctxt->input->cur[2] == 'D') &&
11776 		    (ctxt->input->cur[3] == 'O') &&
11777 		    (ctxt->input->cur[4] == 'C') &&
11778 		    (ctxt->input->cur[5] == 'T') &&
11779 		    (ctxt->input->cur[6] == 'Y') &&
11780 		    (ctxt->input->cur[7] == 'P') &&
11781 		    (ctxt->input->cur[8] == 'E')) {
11782 		    if ((!terminate) &&
11783 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11784                         ctxt->progressive = XML_PARSER_DTD;
11785 			goto done;
11786                     }
11787 #ifdef DEBUG_PUSH
11788 		    xmlGenericError(xmlGenericErrorContext,
11789 			    "PP: Parsing internal subset\n");
11790 #endif
11791 		    ctxt->inSubset = 1;
11792                     ctxt->progressive = 0;
11793 		    ctxt->checkIndex = 0;
11794 		    xmlParseDocTypeDecl(ctxt);
11795 		    if (ctxt->instate == XML_PARSER_EOF)
11796 			goto done;
11797 		    if (RAW == '[') {
11798 			ctxt->instate = XML_PARSER_DTD;
11799 #ifdef DEBUG_PUSH
11800 			xmlGenericError(xmlGenericErrorContext,
11801 				"PP: entering DTD\n");
11802 #endif
11803 		    } else {
11804 			/*
11805 			 * Create and update the external subset.
11806 			 */
11807 			ctxt->inSubset = 2;
11808 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11809 			    (ctxt->sax->externalSubset != NULL))
11810 			    ctxt->sax->externalSubset(ctxt->userData,
11811 				    ctxt->intSubName, ctxt->extSubSystem,
11812 				    ctxt->extSubURI);
11813 			ctxt->inSubset = 0;
11814 			xmlCleanSpecialAttr(ctxt);
11815 			ctxt->instate = XML_PARSER_PROLOG;
11816 #ifdef DEBUG_PUSH
11817 			xmlGenericError(xmlGenericErrorContext,
11818 				"PP: entering PROLOG\n");
11819 #endif
11820 		    }
11821 		} else if ((cur == '<') && (next == '!') &&
11822 		           (avail < 9)) {
11823 		    goto done;
11824 		} else {
11825 		    ctxt->instate = XML_PARSER_START_TAG;
11826 		    ctxt->progressive = XML_PARSER_START_TAG;
11827 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11828 #ifdef DEBUG_PUSH
11829 		    xmlGenericError(xmlGenericErrorContext,
11830 			    "PP: entering START_TAG\n");
11831 #endif
11832 		}
11833 		break;
11834             case XML_PARSER_PROLOG:
11835 		SKIP_BLANKS;
11836 		if (ctxt->input->buf == NULL)
11837 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11838 		else
11839 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11840                             (ctxt->input->cur - ctxt->input->base);
11841 		if (avail < 2)
11842 		    goto done;
11843 		cur = ctxt->input->cur[0];
11844 		next = ctxt->input->cur[1];
11845 	        if ((cur == '<') && (next == '?')) {
11846 		    if ((!terminate) &&
11847 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11848                         ctxt->progressive = XML_PARSER_PI;
11849 			goto done;
11850                     }
11851 #ifdef DEBUG_PUSH
11852 		    xmlGenericError(xmlGenericErrorContext,
11853 			    "PP: Parsing PI\n");
11854 #endif
11855 		    xmlParsePI(ctxt);
11856 		    if (ctxt->instate == XML_PARSER_EOF)
11857 			goto done;
11858 		    ctxt->instate = XML_PARSER_PROLOG;
11859                     ctxt->progressive = 1;
11860 		} else if ((cur == '<') && (next == '!') &&
11861 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11862 		    if ((!terminate) &&
11863 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11864                         ctxt->progressive = XML_PARSER_COMMENT;
11865 			goto done;
11866                     }
11867 #ifdef DEBUG_PUSH
11868 		    xmlGenericError(xmlGenericErrorContext,
11869 			    "PP: Parsing Comment\n");
11870 #endif
11871 		    xmlParseComment(ctxt);
11872 		    if (ctxt->instate == XML_PARSER_EOF)
11873 			goto done;
11874 		    ctxt->instate = XML_PARSER_PROLOG;
11875                     ctxt->progressive = 1;
11876 		} else if ((cur == '<') && (next == '!') &&
11877 		           (avail < 4)) {
11878 		    goto done;
11879 		} else {
11880 		    ctxt->instate = XML_PARSER_START_TAG;
11881 		    if (ctxt->progressive == 0)
11882 			ctxt->progressive = XML_PARSER_START_TAG;
11883 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11884 #ifdef DEBUG_PUSH
11885 		    xmlGenericError(xmlGenericErrorContext,
11886 			    "PP: entering START_TAG\n");
11887 #endif
11888 		}
11889 		break;
11890             case XML_PARSER_EPILOG:
11891 		SKIP_BLANKS;
11892 		if (ctxt->input->buf == NULL)
11893 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11894 		else
11895 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11896                             (ctxt->input->cur - ctxt->input->base);
11897 		if (avail < 2)
11898 		    goto done;
11899 		cur = ctxt->input->cur[0];
11900 		next = ctxt->input->cur[1];
11901 	        if ((cur == '<') && (next == '?')) {
11902 		    if ((!terminate) &&
11903 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11904                         ctxt->progressive = XML_PARSER_PI;
11905 			goto done;
11906                     }
11907 #ifdef DEBUG_PUSH
11908 		    xmlGenericError(xmlGenericErrorContext,
11909 			    "PP: Parsing PI\n");
11910 #endif
11911 		    xmlParsePI(ctxt);
11912 		    if (ctxt->instate == XML_PARSER_EOF)
11913 			goto done;
11914 		    ctxt->instate = XML_PARSER_EPILOG;
11915                     ctxt->progressive = 1;
11916 		} else if ((cur == '<') && (next == '!') &&
11917 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11918 		    if ((!terminate) &&
11919 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11920                         ctxt->progressive = XML_PARSER_COMMENT;
11921 			goto done;
11922                     }
11923 #ifdef DEBUG_PUSH
11924 		    xmlGenericError(xmlGenericErrorContext,
11925 			    "PP: Parsing Comment\n");
11926 #endif
11927 		    xmlParseComment(ctxt);
11928 		    if (ctxt->instate == XML_PARSER_EOF)
11929 			goto done;
11930 		    ctxt->instate = XML_PARSER_EPILOG;
11931                     ctxt->progressive = 1;
11932 		} else if ((cur == '<') && (next == '!') &&
11933 		           (avail < 4)) {
11934 		    goto done;
11935 		} else {
11936 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11937 		    xmlHaltParser(ctxt);
11938 #ifdef DEBUG_PUSH
11939 		    xmlGenericError(xmlGenericErrorContext,
11940 			    "PP: entering EOF\n");
11941 #endif
11942 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11943 			ctxt->sax->endDocument(ctxt->userData);
11944 		    goto done;
11945 		}
11946 		break;
11947             case XML_PARSER_DTD: {
11948 	        /*
11949 		 * Sorry but progressive parsing of the internal subset
11950 		 * is not expected to be supported. We first check that
11951 		 * the full content of the internal subset is available and
11952 		 * the parsing is launched only at that point.
11953 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11954 		 * section and not in a ']]>' sequence which are conditional
11955 		 * sections (whoever argued to keep that crap in XML deserve
11956 		 * a place in hell !).
11957 		 */
11958 		int base, i;
11959 		xmlChar *buf;
11960 	        xmlChar quote = 0;
11961                 size_t use;
11962 
11963 		base = ctxt->input->cur - ctxt->input->base;
11964 		if (base < 0) return(0);
11965 		if (ctxt->checkIndex > base)
11966 		    base = ctxt->checkIndex;
11967 		buf = xmlBufContent(ctxt->input->buf->buffer);
11968                 use = xmlBufUse(ctxt->input->buf->buffer);
11969 		for (;(unsigned int) base < use; base++) {
11970 		    if (quote != 0) {
11971 		        if (buf[base] == quote)
11972 			    quote = 0;
11973 			continue;
11974 		    }
11975 		    if ((quote == 0) && (buf[base] == '<')) {
11976 		        int found  = 0;
11977 			/* special handling of comments */
11978 		        if (((unsigned int) base + 4 < use) &&
11979 			    (buf[base + 1] == '!') &&
11980 			    (buf[base + 2] == '-') &&
11981 			    (buf[base + 3] == '-')) {
11982 			    for (;(unsigned int) base + 3 < use; base++) {
11983 				if ((buf[base] == '-') &&
11984 				    (buf[base + 1] == '-') &&
11985 				    (buf[base + 2] == '>')) {
11986 				    found = 1;
11987 				    base += 2;
11988 				    break;
11989 				}
11990 		            }
11991 			    if (!found) {
11992 #if 0
11993 			        fprintf(stderr, "unfinished comment\n");
11994 #endif
11995 			        break; /* for */
11996 		            }
11997 		            continue;
11998 			}
11999 		    }
12000 		    if (buf[base] == '"') {
12001 		        quote = '"';
12002 			continue;
12003 		    }
12004 		    if (buf[base] == '\'') {
12005 		        quote = '\'';
12006 			continue;
12007 		    }
12008 		    if (buf[base] == ']') {
12009 #if 0
12010 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
12011 			        buf[base + 1], buf[base + 2], buf[base + 3]);
12012 #endif
12013 		        if ((unsigned int) base +1 >= use)
12014 			    break;
12015 			if (buf[base + 1] == ']') {
12016 			    /* conditional crap, skip both ']' ! */
12017 			    base++;
12018 			    continue;
12019 			}
12020 		        for (i = 1; (unsigned int) base + i < use; i++) {
12021 			    if (buf[base + i] == '>') {
12022 #if 0
12023 			        fprintf(stderr, "found\n");
12024 #endif
12025 			        goto found_end_int_subset;
12026 			    }
12027 			    if (!IS_BLANK_CH(buf[base + i])) {
12028 #if 0
12029 			        fprintf(stderr, "not found\n");
12030 #endif
12031 			        goto not_end_of_int_subset;
12032 			    }
12033 			}
12034 #if 0
12035 			fprintf(stderr, "end of stream\n");
12036 #endif
12037 		        break;
12038 
12039 		    }
12040 not_end_of_int_subset:
12041                     continue; /* for */
12042 		}
12043 		/*
12044 		 * We didn't found the end of the Internal subset
12045 		 */
12046                 if (quote == 0)
12047                     ctxt->checkIndex = base;
12048                 else
12049                     ctxt->checkIndex = 0;
12050 #ifdef DEBUG_PUSH
12051 		if (next == 0)
12052 		    xmlGenericError(xmlGenericErrorContext,
12053 			    "PP: lookup of int subset end filed\n");
12054 #endif
12055 	        goto done;
12056 
12057 found_end_int_subset:
12058                 ctxt->checkIndex = 0;
12059 		xmlParseInternalSubset(ctxt);
12060 		if (ctxt->instate == XML_PARSER_EOF)
12061 		    goto done;
12062 		ctxt->inSubset = 2;
12063 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12064 		    (ctxt->sax->externalSubset != NULL))
12065 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12066 			    ctxt->extSubSystem, ctxt->extSubURI);
12067 		ctxt->inSubset = 0;
12068 		xmlCleanSpecialAttr(ctxt);
12069 		if (ctxt->instate == XML_PARSER_EOF)
12070 		    goto done;
12071 		ctxt->instate = XML_PARSER_PROLOG;
12072 		ctxt->checkIndex = 0;
12073 #ifdef DEBUG_PUSH
12074 		xmlGenericError(xmlGenericErrorContext,
12075 			"PP: entering PROLOG\n");
12076 #endif
12077                 break;
12078 	    }
12079             case XML_PARSER_COMMENT:
12080 		xmlGenericError(xmlGenericErrorContext,
12081 			"PP: internal error, state == COMMENT\n");
12082 		ctxt->instate = XML_PARSER_CONTENT;
12083 #ifdef DEBUG_PUSH
12084 		xmlGenericError(xmlGenericErrorContext,
12085 			"PP: entering CONTENT\n");
12086 #endif
12087 		break;
12088             case XML_PARSER_IGNORE:
12089 		xmlGenericError(xmlGenericErrorContext,
12090 			"PP: internal error, state == IGNORE");
12091 	        ctxt->instate = XML_PARSER_DTD;
12092 #ifdef DEBUG_PUSH
12093 		xmlGenericError(xmlGenericErrorContext,
12094 			"PP: entering DTD\n");
12095 #endif
12096 	        break;
12097             case XML_PARSER_PI:
12098 		xmlGenericError(xmlGenericErrorContext,
12099 			"PP: internal error, state == PI\n");
12100 		ctxt->instate = XML_PARSER_CONTENT;
12101 #ifdef DEBUG_PUSH
12102 		xmlGenericError(xmlGenericErrorContext,
12103 			"PP: entering CONTENT\n");
12104 #endif
12105 		break;
12106             case XML_PARSER_ENTITY_DECL:
12107 		xmlGenericError(xmlGenericErrorContext,
12108 			"PP: internal error, state == ENTITY_DECL\n");
12109 		ctxt->instate = XML_PARSER_DTD;
12110 #ifdef DEBUG_PUSH
12111 		xmlGenericError(xmlGenericErrorContext,
12112 			"PP: entering DTD\n");
12113 #endif
12114 		break;
12115             case XML_PARSER_ENTITY_VALUE:
12116 		xmlGenericError(xmlGenericErrorContext,
12117 			"PP: internal error, state == ENTITY_VALUE\n");
12118 		ctxt->instate = XML_PARSER_CONTENT;
12119 #ifdef DEBUG_PUSH
12120 		xmlGenericError(xmlGenericErrorContext,
12121 			"PP: entering DTD\n");
12122 #endif
12123 		break;
12124             case XML_PARSER_ATTRIBUTE_VALUE:
12125 		xmlGenericError(xmlGenericErrorContext,
12126 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12127 		ctxt->instate = XML_PARSER_START_TAG;
12128 #ifdef DEBUG_PUSH
12129 		xmlGenericError(xmlGenericErrorContext,
12130 			"PP: entering START_TAG\n");
12131 #endif
12132 		break;
12133             case XML_PARSER_SYSTEM_LITERAL:
12134 		xmlGenericError(xmlGenericErrorContext,
12135 			"PP: internal error, state == SYSTEM_LITERAL\n");
12136 		ctxt->instate = XML_PARSER_START_TAG;
12137 #ifdef DEBUG_PUSH
12138 		xmlGenericError(xmlGenericErrorContext,
12139 			"PP: entering START_TAG\n");
12140 #endif
12141 		break;
12142             case XML_PARSER_PUBLIC_LITERAL:
12143 		xmlGenericError(xmlGenericErrorContext,
12144 			"PP: internal error, state == PUBLIC_LITERAL\n");
12145 		ctxt->instate = XML_PARSER_START_TAG;
12146 #ifdef DEBUG_PUSH
12147 		xmlGenericError(xmlGenericErrorContext,
12148 			"PP: entering START_TAG\n");
12149 #endif
12150 		break;
12151 	}
12152     }
12153 done:
12154 #ifdef DEBUG_PUSH
12155     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12156 #endif
12157     return(ret);
12158 encoding_error:
12159     {
12160         char buffer[150];
12161 
12162 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12163 			ctxt->input->cur[0], ctxt->input->cur[1],
12164 			ctxt->input->cur[2], ctxt->input->cur[3]);
12165 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12166 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12167 		     BAD_CAST buffer, NULL);
12168     }
12169     return(0);
12170 }
12171 
12172 /**
12173  * xmlParseCheckTransition:
12174  * @ctxt:  an XML parser context
12175  * @chunk:  a char array
12176  * @size:  the size in byte of the chunk
12177  *
12178  * Check depending on the current parser state if the chunk given must be
12179  * processed immediately or one need more data to advance on parsing.
12180  *
12181  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12182  */
12183 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12184 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12185     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12186         return(-1);
12187     if (ctxt->instate == XML_PARSER_START_TAG) {
12188         if (memchr(chunk, '>', size) != NULL)
12189             return(1);
12190         return(0);
12191     }
12192     if (ctxt->progressive == XML_PARSER_COMMENT) {
12193         if (memchr(chunk, '>', size) != NULL)
12194             return(1);
12195         return(0);
12196     }
12197     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12198         if (memchr(chunk, '>', size) != NULL)
12199             return(1);
12200         return(0);
12201     }
12202     if (ctxt->progressive == XML_PARSER_PI) {
12203         if (memchr(chunk, '>', size) != NULL)
12204             return(1);
12205         return(0);
12206     }
12207     if (ctxt->instate == XML_PARSER_END_TAG) {
12208         if (memchr(chunk, '>', size) != NULL)
12209             return(1);
12210         return(0);
12211     }
12212     if ((ctxt->progressive == XML_PARSER_DTD) ||
12213         (ctxt->instate == XML_PARSER_DTD)) {
12214         if (memchr(chunk, '>', size) != NULL)
12215             return(1);
12216         return(0);
12217     }
12218     return(1);
12219 }
12220 
12221 /**
12222  * xmlParseChunk:
12223  * @ctxt:  an XML parser context
12224  * @chunk:  an char array
12225  * @size:  the size in byte of the chunk
12226  * @terminate:  last chunk indicator
12227  *
12228  * Parse a Chunk of memory
12229  *
12230  * Returns zero if no error, the xmlParserErrors otherwise.
12231  */
12232 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12233 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12234               int terminate) {
12235     int end_in_lf = 0;
12236     int remain = 0;
12237     size_t old_avail = 0;
12238     size_t avail = 0;
12239 
12240     if (ctxt == NULL)
12241         return(XML_ERR_INTERNAL_ERROR);
12242     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12243         return(ctxt->errNo);
12244     if (ctxt->instate == XML_PARSER_EOF)
12245         return(-1);
12246     if (ctxt->instate == XML_PARSER_START)
12247         xmlDetectSAX2(ctxt);
12248     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12249         (chunk[size - 1] == '\r')) {
12250 	end_in_lf = 1;
12251 	size--;
12252     }
12253 
12254 xmldecl_done:
12255 
12256     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12257         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12258 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12259 	size_t cur = ctxt->input->cur - ctxt->input->base;
12260 	int res;
12261 
12262         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12263         /*
12264          * Specific handling if we autodetected an encoding, we should not
12265          * push more than the first line ... which depend on the encoding
12266          * And only push the rest once the final encoding was detected
12267          */
12268         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12269             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12270             unsigned int len = 45;
12271 
12272             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12273                                BAD_CAST "UTF-16")) ||
12274                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12275                                BAD_CAST "UTF16")))
12276                 len = 90;
12277             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278                                     BAD_CAST "UCS-4")) ||
12279                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280                                     BAD_CAST "UCS4")))
12281                 len = 180;
12282 
12283             if (ctxt->input->buf->rawconsumed < len)
12284                 len -= ctxt->input->buf->rawconsumed;
12285 
12286             /*
12287              * Change size for reading the initial declaration only
12288              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12289              * will blindly copy extra bytes from memory.
12290              */
12291             if ((unsigned int) size > len) {
12292                 remain = size - len;
12293                 size = len;
12294             } else {
12295                 remain = 0;
12296             }
12297         }
12298 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12299         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12300 	if (res < 0) {
12301 	    ctxt->errNo = XML_PARSER_EOF;
12302 	    xmlHaltParser(ctxt);
12303 	    return (XML_PARSER_EOF);
12304 	}
12305 #ifdef DEBUG_PUSH
12306 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12307 #endif
12308 
12309     } else if (ctxt->instate != XML_PARSER_EOF) {
12310 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12311 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12312 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12313 		    (in->raw != NULL)) {
12314 		int nbchars;
12315 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12316 		size_t current = ctxt->input->cur - ctxt->input->base;
12317 
12318 		nbchars = xmlCharEncInput(in, terminate);
12319 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12320 		if (nbchars < 0) {
12321 		    /* TODO 2.6.0 */
12322 		    xmlGenericError(xmlGenericErrorContext,
12323 				    "xmlParseChunk: encoder error\n");
12324                     xmlHaltParser(ctxt);
12325 		    return(XML_ERR_INVALID_ENCODING);
12326 		}
12327 	    }
12328 	}
12329     }
12330     if (remain != 0) {
12331         xmlParseTryOrFinish(ctxt, 0);
12332     } else {
12333         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12334             avail = xmlBufUse(ctxt->input->buf->buffer);
12335         /*
12336          * Depending on the current state it may not be such
12337          * a good idea to try parsing if there is nothing in the chunk
12338          * which would be worth doing a parser state transition and we
12339          * need to wait for more data
12340          */
12341         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12342             (old_avail == 0) || (avail == 0) ||
12343             (xmlParseCheckTransition(ctxt,
12344                        (const char *)&ctxt->input->base[old_avail],
12345                                      avail - old_avail)))
12346             xmlParseTryOrFinish(ctxt, terminate);
12347     }
12348     if (ctxt->instate == XML_PARSER_EOF)
12349         return(ctxt->errNo);
12350 
12351     if ((ctxt->input != NULL) &&
12352          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12353          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12354         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12355         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12356         xmlHaltParser(ctxt);
12357     }
12358     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12359         return(ctxt->errNo);
12360 
12361     if (remain != 0) {
12362         chunk += size;
12363         size = remain;
12364         remain = 0;
12365         goto xmldecl_done;
12366     }
12367     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12368         (ctxt->input->buf != NULL)) {
12369 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12370 					 ctxt->input);
12371 	size_t current = ctxt->input->cur - ctxt->input->base;
12372 
12373 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12374 
12375 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12376 			      base, current);
12377     }
12378     if (terminate) {
12379 	/*
12380 	 * Check for termination
12381 	 */
12382 	int cur_avail = 0;
12383 
12384 	if (ctxt->input != NULL) {
12385 	    if (ctxt->input->buf == NULL)
12386 		cur_avail = ctxt->input->length -
12387 			    (ctxt->input->cur - ctxt->input->base);
12388 	    else
12389 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12390 			              (ctxt->input->cur - ctxt->input->base);
12391 	}
12392 
12393 	if ((ctxt->instate != XML_PARSER_EOF) &&
12394 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12395 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12396 	}
12397 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12398 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12399 	}
12400 	if (ctxt->instate != XML_PARSER_EOF) {
12401 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12402 		ctxt->sax->endDocument(ctxt->userData);
12403 	}
12404 	ctxt->instate = XML_PARSER_EOF;
12405     }
12406     if (ctxt->wellFormed == 0)
12407 	return((xmlParserErrors) ctxt->errNo);
12408     else
12409         return(0);
12410 }
12411 
12412 /************************************************************************
12413  *									*
12414  *		I/O front end functions to the parser			*
12415  *									*
12416  ************************************************************************/
12417 
12418 /**
12419  * xmlCreatePushParserCtxt:
12420  * @sax:  a SAX handler
12421  * @user_data:  The user data returned on SAX callbacks
12422  * @chunk:  a pointer to an array of chars
12423  * @size:  number of chars in the array
12424  * @filename:  an optional file name or URI
12425  *
12426  * Create a parser context for using the XML parser in push mode.
12427  * If @buffer and @size are non-NULL, the data is used to detect
12428  * the encoding.  The remaining characters will be parsed so they
12429  * don't need to be fed in again through xmlParseChunk.
12430  * To allow content encoding detection, @size should be >= 4
12431  * The value of @filename is used for fetching external entities
12432  * and error/warning reports.
12433  *
12434  * Returns the new parser context or NULL
12435  */
12436 
12437 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12438 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12439                         const char *chunk, int size, const char *filename) {
12440     xmlParserCtxtPtr ctxt;
12441     xmlParserInputPtr inputStream;
12442     xmlParserInputBufferPtr buf;
12443     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12444 
12445     /*
12446      * plug some encoding conversion routines
12447      */
12448     if ((chunk != NULL) && (size >= 4))
12449 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12450 
12451     buf = xmlAllocParserInputBuffer(enc);
12452     if (buf == NULL) return(NULL);
12453 
12454     ctxt = xmlNewParserCtxt();
12455     if (ctxt == NULL) {
12456         xmlErrMemory(NULL, "creating parser: out of memory\n");
12457 	xmlFreeParserInputBuffer(buf);
12458 	return(NULL);
12459     }
12460     ctxt->dictNames = 1;
12461     if (sax != NULL) {
12462 #ifdef LIBXML_SAX1_ENABLED
12463 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12464 #endif /* LIBXML_SAX1_ENABLED */
12465 	    xmlFree(ctxt->sax);
12466 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12467 	if (ctxt->sax == NULL) {
12468 	    xmlErrMemory(ctxt, NULL);
12469 	    xmlFreeParserInputBuffer(buf);
12470 	    xmlFreeParserCtxt(ctxt);
12471 	    return(NULL);
12472 	}
12473 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12474 	if (sax->initialized == XML_SAX2_MAGIC)
12475 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12476 	else
12477 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12478 	if (user_data != NULL)
12479 	    ctxt->userData = user_data;
12480     }
12481     if (filename == NULL) {
12482 	ctxt->directory = NULL;
12483     } else {
12484         ctxt->directory = xmlParserGetDirectory(filename);
12485     }
12486 
12487     inputStream = xmlNewInputStream(ctxt);
12488     if (inputStream == NULL) {
12489 	xmlFreeParserCtxt(ctxt);
12490 	xmlFreeParserInputBuffer(buf);
12491 	return(NULL);
12492     }
12493 
12494     if (filename == NULL)
12495 	inputStream->filename = NULL;
12496     else {
12497 	inputStream->filename = (char *)
12498 	    xmlCanonicPath((const xmlChar *) filename);
12499 	if (inputStream->filename == NULL) {
12500 	    xmlFreeParserCtxt(ctxt);
12501 	    xmlFreeParserInputBuffer(buf);
12502 	    return(NULL);
12503 	}
12504     }
12505     inputStream->buf = buf;
12506     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12507     inputPush(ctxt, inputStream);
12508 
12509     /*
12510      * If the caller didn't provide an initial 'chunk' for determining
12511      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12512      * that it can be automatically determined later
12513      */
12514     if ((size == 0) || (chunk == NULL)) {
12515 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12516     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12517 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12518 	size_t cur = ctxt->input->cur - ctxt->input->base;
12519 
12520 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12521 
12522         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12523 #ifdef DEBUG_PUSH
12524 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12525 #endif
12526     }
12527 
12528     if (enc != XML_CHAR_ENCODING_NONE) {
12529         xmlSwitchEncoding(ctxt, enc);
12530     }
12531 
12532     return(ctxt);
12533 }
12534 #endif /* LIBXML_PUSH_ENABLED */
12535 
12536 /**
12537  * xmlHaltParser:
12538  * @ctxt:  an XML parser context
12539  *
12540  * Blocks further parser processing don't override error
12541  * for internal use
12542  */
12543 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12544 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12545     if (ctxt == NULL)
12546         return;
12547     ctxt->instate = XML_PARSER_EOF;
12548     ctxt->disableSAX = 1;
12549     while (ctxt->inputNr > 1)
12550         xmlFreeInputStream(inputPop(ctxt));
12551     if (ctxt->input != NULL) {
12552         /*
12553 	 * in case there was a specific allocation deallocate before
12554 	 * overriding base
12555 	 */
12556         if (ctxt->input->free != NULL) {
12557 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12558 	    ctxt->input->free = NULL;
12559 	}
12560         if (ctxt->input->buf != NULL) {
12561             xmlFreeParserInputBuffer(ctxt->input->buf);
12562             ctxt->input->buf = NULL;
12563         }
12564 	ctxt->input->cur = BAD_CAST"";
12565         ctxt->input->length = 0;
12566 	ctxt->input->base = ctxt->input->cur;
12567         ctxt->input->end = ctxt->input->cur;
12568     }
12569 }
12570 
12571 /**
12572  * xmlStopParser:
12573  * @ctxt:  an XML parser context
12574  *
12575  * Blocks further parser processing
12576  */
12577 void
xmlStopParser(xmlParserCtxtPtr ctxt)12578 xmlStopParser(xmlParserCtxtPtr ctxt) {
12579     if (ctxt == NULL)
12580         return;
12581     xmlHaltParser(ctxt);
12582     ctxt->errNo = XML_ERR_USER_STOP;
12583 }
12584 
12585 /**
12586  * xmlCreateIOParserCtxt:
12587  * @sax:  a SAX handler
12588  * @user_data:  The user data returned on SAX callbacks
12589  * @ioread:  an I/O read function
12590  * @ioclose:  an I/O close function
12591  * @ioctx:  an I/O handler
12592  * @enc:  the charset encoding if known
12593  *
12594  * Create a parser context for using the XML parser with an existing
12595  * I/O stream
12596  *
12597  * Returns the new parser context or NULL
12598  */
12599 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12600 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12601 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12602 	void *ioctx, xmlCharEncoding enc) {
12603     xmlParserCtxtPtr ctxt;
12604     xmlParserInputPtr inputStream;
12605     xmlParserInputBufferPtr buf;
12606 
12607     if (ioread == NULL) return(NULL);
12608 
12609     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12610     if (buf == NULL) {
12611         if (ioclose != NULL)
12612             ioclose(ioctx);
12613         return (NULL);
12614     }
12615 
12616     ctxt = xmlNewParserCtxt();
12617     if (ctxt == NULL) {
12618 	xmlFreeParserInputBuffer(buf);
12619 	return(NULL);
12620     }
12621     if (sax != NULL) {
12622 #ifdef LIBXML_SAX1_ENABLED
12623 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12624 #endif /* LIBXML_SAX1_ENABLED */
12625 	    xmlFree(ctxt->sax);
12626 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12627 	if (ctxt->sax == NULL) {
12628 	    xmlErrMemory(ctxt, NULL);
12629 	    xmlFreeParserCtxt(ctxt);
12630 	    return(NULL);
12631 	}
12632 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12633 	if (sax->initialized == XML_SAX2_MAGIC)
12634 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12635 	else
12636 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12637 	if (user_data != NULL)
12638 	    ctxt->userData = user_data;
12639     }
12640 
12641     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12642     if (inputStream == NULL) {
12643 	xmlFreeParserCtxt(ctxt);
12644 	return(NULL);
12645     }
12646     inputPush(ctxt, inputStream);
12647 
12648     return(ctxt);
12649 }
12650 
12651 #ifdef LIBXML_VALID_ENABLED
12652 /************************************************************************
12653  *									*
12654  *		Front ends when parsing a DTD				*
12655  *									*
12656  ************************************************************************/
12657 
12658 /**
12659  * xmlIOParseDTD:
12660  * @sax:  the SAX handler block or NULL
12661  * @input:  an Input Buffer
12662  * @enc:  the charset encoding if known
12663  *
12664  * Load and parse a DTD
12665  *
12666  * Returns the resulting xmlDtdPtr or NULL in case of error.
12667  * @input will be freed by the function in any case.
12668  */
12669 
12670 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12671 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12672 	      xmlCharEncoding enc) {
12673     xmlDtdPtr ret = NULL;
12674     xmlParserCtxtPtr ctxt;
12675     xmlParserInputPtr pinput = NULL;
12676     xmlChar start[4];
12677 
12678     if (input == NULL)
12679 	return(NULL);
12680 
12681     ctxt = xmlNewParserCtxt();
12682     if (ctxt == NULL) {
12683         xmlFreeParserInputBuffer(input);
12684 	return(NULL);
12685     }
12686 
12687     /* We are loading a DTD */
12688     ctxt->options |= XML_PARSE_DTDLOAD;
12689 
12690     /*
12691      * Set-up the SAX context
12692      */
12693     if (sax != NULL) {
12694 	if (ctxt->sax != NULL)
12695 	    xmlFree(ctxt->sax);
12696         ctxt->sax = sax;
12697         ctxt->userData = ctxt;
12698     }
12699     xmlDetectSAX2(ctxt);
12700 
12701     /*
12702      * generate a parser input from the I/O handler
12703      */
12704 
12705     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12706     if (pinput == NULL) {
12707         if (sax != NULL) ctxt->sax = NULL;
12708         xmlFreeParserInputBuffer(input);
12709 	xmlFreeParserCtxt(ctxt);
12710 	return(NULL);
12711     }
12712 
12713     /*
12714      * plug some encoding conversion routines here.
12715      */
12716     if (xmlPushInput(ctxt, pinput) < 0) {
12717         if (sax != NULL) ctxt->sax = NULL;
12718 	xmlFreeParserCtxt(ctxt);
12719 	return(NULL);
12720     }
12721     if (enc != XML_CHAR_ENCODING_NONE) {
12722         xmlSwitchEncoding(ctxt, enc);
12723     }
12724 
12725     pinput->filename = NULL;
12726     pinput->line = 1;
12727     pinput->col = 1;
12728     pinput->base = ctxt->input->cur;
12729     pinput->cur = ctxt->input->cur;
12730     pinput->free = NULL;
12731 
12732     /*
12733      * let's parse that entity knowing it's an external subset.
12734      */
12735     ctxt->inSubset = 2;
12736     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12737     if (ctxt->myDoc == NULL) {
12738 	xmlErrMemory(ctxt, "New Doc failed");
12739 	return(NULL);
12740     }
12741     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12742     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12743 	                               BAD_CAST "none", BAD_CAST "none");
12744 
12745     if ((enc == XML_CHAR_ENCODING_NONE) &&
12746         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12747 	/*
12748 	 * Get the 4 first bytes and decode the charset
12749 	 * if enc != XML_CHAR_ENCODING_NONE
12750 	 * plug some encoding conversion routines.
12751 	 */
12752 	start[0] = RAW;
12753 	start[1] = NXT(1);
12754 	start[2] = NXT(2);
12755 	start[3] = NXT(3);
12756 	enc = xmlDetectCharEncoding(start, 4);
12757 	if (enc != XML_CHAR_ENCODING_NONE) {
12758 	    xmlSwitchEncoding(ctxt, enc);
12759 	}
12760     }
12761 
12762     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12763 
12764     if (ctxt->myDoc != NULL) {
12765 	if (ctxt->wellFormed) {
12766 	    ret = ctxt->myDoc->extSubset;
12767 	    ctxt->myDoc->extSubset = NULL;
12768 	    if (ret != NULL) {
12769 		xmlNodePtr tmp;
12770 
12771 		ret->doc = NULL;
12772 		tmp = ret->children;
12773 		while (tmp != NULL) {
12774 		    tmp->doc = NULL;
12775 		    tmp = tmp->next;
12776 		}
12777 	    }
12778 	} else {
12779 	    ret = NULL;
12780 	}
12781         xmlFreeDoc(ctxt->myDoc);
12782         ctxt->myDoc = NULL;
12783     }
12784     if (sax != NULL) ctxt->sax = NULL;
12785     xmlFreeParserCtxt(ctxt);
12786 
12787     return(ret);
12788 }
12789 
12790 /**
12791  * xmlSAXParseDTD:
12792  * @sax:  the SAX handler block
12793  * @ExternalID:  a NAME* containing the External ID of the DTD
12794  * @SystemID:  a NAME* containing the URL to the DTD
12795  *
12796  * Load and parse an external subset.
12797  *
12798  * Returns the resulting xmlDtdPtr or NULL in case of error.
12799  */
12800 
12801 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12802 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12803                           const xmlChar *SystemID) {
12804     xmlDtdPtr ret = NULL;
12805     xmlParserCtxtPtr ctxt;
12806     xmlParserInputPtr input = NULL;
12807     xmlCharEncoding enc;
12808     xmlChar* systemIdCanonic;
12809 
12810     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12811 
12812     ctxt = xmlNewParserCtxt();
12813     if (ctxt == NULL) {
12814 	return(NULL);
12815     }
12816 
12817     /* We are loading a DTD */
12818     ctxt->options |= XML_PARSE_DTDLOAD;
12819 
12820     /*
12821      * Set-up the SAX context
12822      */
12823     if (sax != NULL) {
12824 	if (ctxt->sax != NULL)
12825 	    xmlFree(ctxt->sax);
12826         ctxt->sax = sax;
12827         ctxt->userData = ctxt;
12828     }
12829 
12830     /*
12831      * Canonicalise the system ID
12832      */
12833     systemIdCanonic = xmlCanonicPath(SystemID);
12834     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12835 	xmlFreeParserCtxt(ctxt);
12836 	return(NULL);
12837     }
12838 
12839     /*
12840      * Ask the Entity resolver to load the damn thing
12841      */
12842 
12843     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12844 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12845 	                                 systemIdCanonic);
12846     if (input == NULL) {
12847         if (sax != NULL) ctxt->sax = NULL;
12848 	xmlFreeParserCtxt(ctxt);
12849 	if (systemIdCanonic != NULL)
12850 	    xmlFree(systemIdCanonic);
12851 	return(NULL);
12852     }
12853 
12854     /*
12855      * plug some encoding conversion routines here.
12856      */
12857     if (xmlPushInput(ctxt, input) < 0) {
12858         if (sax != NULL) ctxt->sax = NULL;
12859 	xmlFreeParserCtxt(ctxt);
12860 	if (systemIdCanonic != NULL)
12861 	    xmlFree(systemIdCanonic);
12862 	return(NULL);
12863     }
12864     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12865 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12866 	xmlSwitchEncoding(ctxt, enc);
12867     }
12868 
12869     if (input->filename == NULL)
12870 	input->filename = (char *) systemIdCanonic;
12871     else
12872 	xmlFree(systemIdCanonic);
12873     input->line = 1;
12874     input->col = 1;
12875     input->base = ctxt->input->cur;
12876     input->cur = ctxt->input->cur;
12877     input->free = NULL;
12878 
12879     /*
12880      * let's parse that entity knowing it's an external subset.
12881      */
12882     ctxt->inSubset = 2;
12883     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12884     if (ctxt->myDoc == NULL) {
12885 	xmlErrMemory(ctxt, "New Doc failed");
12886         if (sax != NULL) ctxt->sax = NULL;
12887 	xmlFreeParserCtxt(ctxt);
12888 	return(NULL);
12889     }
12890     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12891     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 	                               ExternalID, SystemID);
12893     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12894 
12895     if (ctxt->myDoc != NULL) {
12896 	if (ctxt->wellFormed) {
12897 	    ret = ctxt->myDoc->extSubset;
12898 	    ctxt->myDoc->extSubset = NULL;
12899 	    if (ret != NULL) {
12900 		xmlNodePtr tmp;
12901 
12902 		ret->doc = NULL;
12903 		tmp = ret->children;
12904 		while (tmp != NULL) {
12905 		    tmp->doc = NULL;
12906 		    tmp = tmp->next;
12907 		}
12908 	    }
12909 	} else {
12910 	    ret = NULL;
12911 	}
12912         xmlFreeDoc(ctxt->myDoc);
12913         ctxt->myDoc = NULL;
12914     }
12915     if (sax != NULL) ctxt->sax = NULL;
12916     xmlFreeParserCtxt(ctxt);
12917 
12918     return(ret);
12919 }
12920 
12921 
12922 /**
12923  * xmlParseDTD:
12924  * @ExternalID:  a NAME* containing the External ID of the DTD
12925  * @SystemID:  a NAME* containing the URL to the DTD
12926  *
12927  * Load and parse an external subset.
12928  *
12929  * Returns the resulting xmlDtdPtr or NULL in case of error.
12930  */
12931 
12932 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12933 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12934     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12935 }
12936 #endif /* LIBXML_VALID_ENABLED */
12937 
12938 /************************************************************************
12939  *									*
12940  *		Front ends when parsing an Entity			*
12941  *									*
12942  ************************************************************************/
12943 
12944 /**
12945  * xmlParseCtxtExternalEntity:
12946  * @ctx:  the existing parsing context
12947  * @URL:  the URL for the entity to load
12948  * @ID:  the System ID for the entity to load
12949  * @lst:  the return value for the set of parsed nodes
12950  *
12951  * Parse an external general entity within an existing parsing context
12952  * An external general parsed entity is well-formed if it matches the
12953  * production labeled extParsedEnt.
12954  *
12955  * [78] extParsedEnt ::= TextDecl? content
12956  *
12957  * Returns 0 if the entity is well formed, -1 in case of args problem and
12958  *    the parser error code otherwise
12959  */
12960 
12961 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12962 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12963 	               const xmlChar *ID, xmlNodePtr *lst) {
12964     void *userData;
12965 
12966     if (ctx == NULL) return(-1);
12967     /*
12968      * If the user provided their own SAX callbacks, then reuse the
12969      * userData callback field, otherwise the expected setup in a
12970      * DOM builder is to have userData == ctxt
12971      */
12972     if (ctx->userData == ctx)
12973         userData = NULL;
12974     else
12975         userData = ctx->userData;
12976     return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12977                                          userData, ctx->depth + 1,
12978                                          URL, ID, lst);
12979 }
12980 
12981 /**
12982  * xmlParseExternalEntityPrivate:
12983  * @doc:  the document the chunk pertains to
12984  * @oldctxt:  the previous parser context if available
12985  * @sax:  the SAX handler block (possibly NULL)
12986  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12987  * @depth:  Used for loop detection, use 0
12988  * @URL:  the URL for the entity to load
12989  * @ID:  the System ID for the entity to load
12990  * @list:  the return value for the set of parsed nodes
12991  *
12992  * Private version of xmlParseExternalEntity()
12993  *
12994  * Returns 0 if the entity is well formed, -1 in case of args problem and
12995  *    the parser error code otherwise
12996  */
12997 
12998 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12999 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13000 	              xmlSAXHandlerPtr sax,
13001 		      void *user_data, int depth, const xmlChar *URL,
13002 		      const xmlChar *ID, xmlNodePtr *list) {
13003     xmlParserCtxtPtr ctxt;
13004     xmlDocPtr newDoc;
13005     xmlNodePtr newRoot;
13006     xmlSAXHandlerPtr oldsax = NULL;
13007     xmlParserErrors ret = XML_ERR_OK;
13008     xmlChar start[4];
13009     xmlCharEncoding enc;
13010 
13011     if (((depth > 40) &&
13012 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13013 	(depth > 1024)) {
13014 	return(XML_ERR_ENTITY_LOOP);
13015     }
13016 
13017     if (list != NULL)
13018         *list = NULL;
13019     if ((URL == NULL) && (ID == NULL))
13020 	return(XML_ERR_INTERNAL_ERROR);
13021     if (doc == NULL)
13022 	return(XML_ERR_INTERNAL_ERROR);
13023 
13024 
13025     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13026     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13027     ctxt->userData = ctxt;
13028     if (sax != NULL) {
13029 	oldsax = ctxt->sax;
13030         ctxt->sax = sax;
13031 	if (user_data != NULL)
13032 	    ctxt->userData = user_data;
13033     }
13034     xmlDetectSAX2(ctxt);
13035     newDoc = xmlNewDoc(BAD_CAST "1.0");
13036     if (newDoc == NULL) {
13037 	xmlFreeParserCtxt(ctxt);
13038 	return(XML_ERR_INTERNAL_ERROR);
13039     }
13040     newDoc->properties = XML_DOC_INTERNAL;
13041     if (doc) {
13042         newDoc->intSubset = doc->intSubset;
13043         newDoc->extSubset = doc->extSubset;
13044         if (doc->dict) {
13045             newDoc->dict = doc->dict;
13046             xmlDictReference(newDoc->dict);
13047         }
13048         if (doc->URL != NULL) {
13049             newDoc->URL = xmlStrdup(doc->URL);
13050         }
13051     }
13052     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13053     if (newRoot == NULL) {
13054 	if (sax != NULL)
13055 	    ctxt->sax = oldsax;
13056 	xmlFreeParserCtxt(ctxt);
13057 	newDoc->intSubset = NULL;
13058 	newDoc->extSubset = NULL;
13059         xmlFreeDoc(newDoc);
13060 	return(XML_ERR_INTERNAL_ERROR);
13061     }
13062     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13063     nodePush(ctxt, newDoc->children);
13064     if (doc == NULL) {
13065         ctxt->myDoc = newDoc;
13066     } else {
13067         ctxt->myDoc = doc;
13068         newRoot->doc = doc;
13069     }
13070 
13071     /*
13072      * Get the 4 first bytes and decode the charset
13073      * if enc != XML_CHAR_ENCODING_NONE
13074      * plug some encoding conversion routines.
13075      */
13076     GROW;
13077     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13078 	start[0] = RAW;
13079 	start[1] = NXT(1);
13080 	start[2] = NXT(2);
13081 	start[3] = NXT(3);
13082 	enc = xmlDetectCharEncoding(start, 4);
13083 	if (enc != XML_CHAR_ENCODING_NONE) {
13084 	    xmlSwitchEncoding(ctxt, enc);
13085 	}
13086     }
13087 
13088     /*
13089      * Parse a possible text declaration first
13090      */
13091     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13092 	xmlParseTextDecl(ctxt);
13093         /*
13094          * An XML-1.0 document can't reference an entity not XML-1.0
13095          */
13096         if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13097             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13098             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13099                            "Version mismatch between document and entity\n");
13100         }
13101     }
13102 
13103     ctxt->instate = XML_PARSER_CONTENT;
13104     ctxt->depth = depth;
13105     if (oldctxt != NULL) {
13106 	ctxt->_private = oldctxt->_private;
13107 	ctxt->loadsubset = oldctxt->loadsubset;
13108 	ctxt->validate = oldctxt->validate;
13109 	ctxt->valid = oldctxt->valid;
13110 	ctxt->replaceEntities = oldctxt->replaceEntities;
13111         if (oldctxt->validate) {
13112             ctxt->vctxt.error = oldctxt->vctxt.error;
13113             ctxt->vctxt.warning = oldctxt->vctxt.warning;
13114             ctxt->vctxt.userData = oldctxt->vctxt.userData;
13115         }
13116 	ctxt->external = oldctxt->external;
13117         if (ctxt->dict) xmlDictFree(ctxt->dict);
13118         ctxt->dict = oldctxt->dict;
13119         ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13120         ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13121         ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13122         ctxt->dictNames = oldctxt->dictNames;
13123         ctxt->attsDefault = oldctxt->attsDefault;
13124         ctxt->attsSpecial = oldctxt->attsSpecial;
13125         ctxt->linenumbers = oldctxt->linenumbers;
13126 	ctxt->record_info = oldctxt->record_info;
13127 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13128 	ctxt->node_seq.length = oldctxt->node_seq.length;
13129 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13130     } else {
13131 	/*
13132 	 * Doing validity checking on chunk without context
13133 	 * doesn't make sense
13134 	 */
13135 	ctxt->_private = NULL;
13136 	ctxt->validate = 0;
13137 	ctxt->external = 2;
13138 	ctxt->loadsubset = 0;
13139     }
13140 
13141     xmlParseContent(ctxt);
13142 
13143     if ((RAW == '<') && (NXT(1) == '/')) {
13144 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13145     } else if (RAW != 0) {
13146 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13147     }
13148     if (ctxt->node != newDoc->children) {
13149 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150     }
13151 
13152     if (!ctxt->wellFormed) {
13153         if (ctxt->errNo == 0)
13154 	    ret = XML_ERR_INTERNAL_ERROR;
13155 	else
13156 	    ret = (xmlParserErrors)ctxt->errNo;
13157     } else {
13158 	if (list != NULL) {
13159 	    xmlNodePtr cur;
13160 
13161 	    /*
13162 	     * Return the newly created nodeset after unlinking it from
13163 	     * they pseudo parent.
13164 	     */
13165 	    cur = newDoc->children->children;
13166 	    *list = cur;
13167 	    while (cur != NULL) {
13168 		cur->parent = NULL;
13169 		cur = cur->next;
13170 	    }
13171             newDoc->children->children = NULL;
13172 	}
13173 	ret = XML_ERR_OK;
13174     }
13175 
13176     /*
13177      * Record in the parent context the number of entities replacement
13178      * done when parsing that reference.
13179      */
13180     if (oldctxt != NULL)
13181         oldctxt->nbentities += ctxt->nbentities;
13182 
13183     /*
13184      * Also record the size of the entity parsed
13185      */
13186     if (ctxt->input != NULL && oldctxt != NULL) {
13187 	oldctxt->sizeentities += ctxt->input->consumed;
13188 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13189     }
13190     /*
13191      * And record the last error if any
13192      */
13193     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13194         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13195 
13196     if (sax != NULL)
13197 	ctxt->sax = oldsax;
13198     if (oldctxt != NULL) {
13199         ctxt->dict = NULL;
13200         ctxt->attsDefault = NULL;
13201         ctxt->attsSpecial = NULL;
13202         oldctxt->validate = ctxt->validate;
13203         oldctxt->valid = ctxt->valid;
13204         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13205         oldctxt->node_seq.length = ctxt->node_seq.length;
13206         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13207     }
13208     ctxt->node_seq.maximum = 0;
13209     ctxt->node_seq.length = 0;
13210     ctxt->node_seq.buffer = NULL;
13211     xmlFreeParserCtxt(ctxt);
13212     newDoc->intSubset = NULL;
13213     newDoc->extSubset = NULL;
13214     xmlFreeDoc(newDoc);
13215 
13216     return(ret);
13217 }
13218 
13219 #ifdef LIBXML_SAX1_ENABLED
13220 /**
13221  * xmlParseExternalEntity:
13222  * @doc:  the document the chunk pertains to
13223  * @sax:  the SAX handler block (possibly NULL)
13224  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13225  * @depth:  Used for loop detection, use 0
13226  * @URL:  the URL for the entity to load
13227  * @ID:  the System ID for the entity to load
13228  * @lst:  the return value for the set of parsed nodes
13229  *
13230  * Parse an external general entity
13231  * An external general parsed entity is well-formed if it matches the
13232  * production labeled extParsedEnt.
13233  *
13234  * [78] extParsedEnt ::= TextDecl? content
13235  *
13236  * Returns 0 if the entity is well formed, -1 in case of args problem and
13237  *    the parser error code otherwise
13238  */
13239 
13240 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13241 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13242 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13243     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13244 		                       ID, lst));
13245 }
13246 
13247 /**
13248  * xmlParseBalancedChunkMemory:
13249  * @doc:  the document the chunk pertains to (must not be NULL)
13250  * @sax:  the SAX handler block (possibly NULL)
13251  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13252  * @depth:  Used for loop detection, use 0
13253  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13254  * @lst:  the return value for the set of parsed nodes
13255  *
13256  * Parse a well-balanced chunk of an XML document
13257  * called by the parser
13258  * The allowed sequence for the Well Balanced Chunk is the one defined by
13259  * the content production in the XML grammar:
13260  *
13261  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13262  *
13263  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13264  *    the parser error code otherwise
13265  */
13266 
13267 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13268 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13269      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13270     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13271                                                 depth, string, lst, 0 );
13272 }
13273 #endif /* LIBXML_SAX1_ENABLED */
13274 
13275 /**
13276  * xmlParseBalancedChunkMemoryInternal:
13277  * @oldctxt:  the existing parsing context
13278  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13279  * @user_data:  the user data field for the parser context
13280  * @lst:  the return value for the set of parsed nodes
13281  *
13282  *
13283  * Parse a well-balanced chunk of an XML document
13284  * called by the parser
13285  * The allowed sequence for the Well Balanced Chunk is the one defined by
13286  * the content production in the XML grammar:
13287  *
13288  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289  *
13290  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13291  * error code otherwise
13292  *
13293  * In case recover is set to 1, the nodelist will not be empty even if
13294  * the parsed chunk is not well balanced.
13295  */
13296 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13297 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13298 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13299     xmlParserCtxtPtr ctxt;
13300     xmlDocPtr newDoc = NULL;
13301     xmlNodePtr newRoot;
13302     xmlSAXHandlerPtr oldsax = NULL;
13303     xmlNodePtr content = NULL;
13304     xmlNodePtr last = NULL;
13305     int size;
13306     xmlParserErrors ret = XML_ERR_OK;
13307 #ifdef SAX2
13308     int i;
13309 #endif
13310 
13311     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13312         (oldctxt->depth >  1024)) {
13313 	return(XML_ERR_ENTITY_LOOP);
13314     }
13315 
13316 
13317     if (lst != NULL)
13318         *lst = NULL;
13319     if (string == NULL)
13320         return(XML_ERR_INTERNAL_ERROR);
13321 
13322     size = xmlStrlen(string);
13323 
13324     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13325     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13326     if (user_data != NULL)
13327 	ctxt->userData = user_data;
13328     else
13329 	ctxt->userData = ctxt;
13330     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13331     ctxt->dict = oldctxt->dict;
13332     ctxt->input_id = oldctxt->input_id + 1;
13333     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13334     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13335     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13336 
13337 #ifdef SAX2
13338     /* propagate namespaces down the entity */
13339     for (i = 0;i < oldctxt->nsNr;i += 2) {
13340         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13341     }
13342 #endif
13343 
13344     oldsax = ctxt->sax;
13345     ctxt->sax = oldctxt->sax;
13346     xmlDetectSAX2(ctxt);
13347     ctxt->replaceEntities = oldctxt->replaceEntities;
13348     ctxt->options = oldctxt->options;
13349 
13350     ctxt->_private = oldctxt->_private;
13351     if (oldctxt->myDoc == NULL) {
13352 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13353 	if (newDoc == NULL) {
13354 	    ctxt->sax = oldsax;
13355 	    ctxt->dict = NULL;
13356 	    xmlFreeParserCtxt(ctxt);
13357 	    return(XML_ERR_INTERNAL_ERROR);
13358 	}
13359 	newDoc->properties = XML_DOC_INTERNAL;
13360 	newDoc->dict = ctxt->dict;
13361 	xmlDictReference(newDoc->dict);
13362 	ctxt->myDoc = newDoc;
13363     } else {
13364 	ctxt->myDoc = oldctxt->myDoc;
13365         content = ctxt->myDoc->children;
13366 	last = ctxt->myDoc->last;
13367     }
13368     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13369     if (newRoot == NULL) {
13370 	ctxt->sax = oldsax;
13371 	ctxt->dict = NULL;
13372 	xmlFreeParserCtxt(ctxt);
13373 	if (newDoc != NULL) {
13374 	    xmlFreeDoc(newDoc);
13375 	}
13376 	return(XML_ERR_INTERNAL_ERROR);
13377     }
13378     ctxt->myDoc->children = NULL;
13379     ctxt->myDoc->last = NULL;
13380     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13381     nodePush(ctxt, ctxt->myDoc->children);
13382     ctxt->instate = XML_PARSER_CONTENT;
13383     ctxt->depth = oldctxt->depth + 1;
13384 
13385     ctxt->validate = 0;
13386     ctxt->loadsubset = oldctxt->loadsubset;
13387     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13388 	/*
13389 	 * ID/IDREF registration will be done in xmlValidateElement below
13390 	 */
13391 	ctxt->loadsubset |= XML_SKIP_IDS;
13392     }
13393     ctxt->dictNames = oldctxt->dictNames;
13394     ctxt->attsDefault = oldctxt->attsDefault;
13395     ctxt->attsSpecial = oldctxt->attsSpecial;
13396 
13397     xmlParseContent(ctxt);
13398     if ((RAW == '<') && (NXT(1) == '/')) {
13399 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13400     } else if (RAW != 0) {
13401 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13402     }
13403     if (ctxt->node != ctxt->myDoc->children) {
13404 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13405     }
13406 
13407     if (!ctxt->wellFormed) {
13408         if (ctxt->errNo == 0)
13409 	    ret = XML_ERR_INTERNAL_ERROR;
13410 	else
13411 	    ret = (xmlParserErrors)ctxt->errNo;
13412     } else {
13413       ret = XML_ERR_OK;
13414     }
13415 
13416     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13417 	xmlNodePtr cur;
13418 
13419 	/*
13420 	 * Return the newly created nodeset after unlinking it from
13421 	 * they pseudo parent.
13422 	 */
13423 	cur = ctxt->myDoc->children->children;
13424 	*lst = cur;
13425 	while (cur != NULL) {
13426 #ifdef LIBXML_VALID_ENABLED
13427 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13428 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13429 		(cur->type == XML_ELEMENT_NODE)) {
13430 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13431 			oldctxt->myDoc, cur);
13432 	    }
13433 #endif /* LIBXML_VALID_ENABLED */
13434 	    cur->parent = NULL;
13435 	    cur = cur->next;
13436 	}
13437 	ctxt->myDoc->children->children = NULL;
13438     }
13439     if (ctxt->myDoc != NULL) {
13440 	xmlFreeNode(ctxt->myDoc->children);
13441         ctxt->myDoc->children = content;
13442         ctxt->myDoc->last = last;
13443     }
13444 
13445     /*
13446      * Record in the parent context the number of entities replacement
13447      * done when parsing that reference.
13448      */
13449     if (oldctxt != NULL)
13450         oldctxt->nbentities += ctxt->nbentities;
13451 
13452     /*
13453      * Also record the last error if any
13454      */
13455     if (ctxt->lastError.code != XML_ERR_OK)
13456         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13457 
13458     ctxt->sax = oldsax;
13459     ctxt->dict = NULL;
13460     ctxt->attsDefault = NULL;
13461     ctxt->attsSpecial = NULL;
13462     xmlFreeParserCtxt(ctxt);
13463     if (newDoc != NULL) {
13464 	xmlFreeDoc(newDoc);
13465     }
13466 
13467     return(ret);
13468 }
13469 
13470 /**
13471  * xmlParseInNodeContext:
13472  * @node:  the context node
13473  * @data:  the input string
13474  * @datalen:  the input string length in bytes
13475  * @options:  a combination of xmlParserOption
13476  * @lst:  the return value for the set of parsed nodes
13477  *
13478  * Parse a well-balanced chunk of an XML document
13479  * within the context (DTD, namespaces, etc ...) of the given node.
13480  *
13481  * The allowed sequence for the data is a Well Balanced Chunk defined by
13482  * the content production in the XML grammar:
13483  *
13484  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13485  *
13486  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13487  * error code otherwise
13488  */
13489 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13490 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13491                       int options, xmlNodePtr *lst) {
13492 #ifdef SAX2
13493     xmlParserCtxtPtr ctxt;
13494     xmlDocPtr doc = NULL;
13495     xmlNodePtr fake, cur;
13496     int nsnr = 0;
13497 
13498     xmlParserErrors ret = XML_ERR_OK;
13499 
13500     /*
13501      * check all input parameters, grab the document
13502      */
13503     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13504         return(XML_ERR_INTERNAL_ERROR);
13505     switch (node->type) {
13506         case XML_ELEMENT_NODE:
13507         case XML_ATTRIBUTE_NODE:
13508         case XML_TEXT_NODE:
13509         case XML_CDATA_SECTION_NODE:
13510         case XML_ENTITY_REF_NODE:
13511         case XML_PI_NODE:
13512         case XML_COMMENT_NODE:
13513         case XML_DOCUMENT_NODE:
13514         case XML_HTML_DOCUMENT_NODE:
13515 	    break;
13516 	default:
13517 	    return(XML_ERR_INTERNAL_ERROR);
13518 
13519     }
13520     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13521            (node->type != XML_DOCUMENT_NODE) &&
13522 	   (node->type != XML_HTML_DOCUMENT_NODE))
13523 	node = node->parent;
13524     if (node == NULL)
13525 	return(XML_ERR_INTERNAL_ERROR);
13526     if (node->type == XML_ELEMENT_NODE)
13527 	doc = node->doc;
13528     else
13529         doc = (xmlDocPtr) node;
13530     if (doc == NULL)
13531 	return(XML_ERR_INTERNAL_ERROR);
13532 
13533     /*
13534      * allocate a context and set-up everything not related to the
13535      * node position in the tree
13536      */
13537     if (doc->type == XML_DOCUMENT_NODE)
13538 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13539 #ifdef LIBXML_HTML_ENABLED
13540     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13541 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13542         /*
13543          * When parsing in context, it makes no sense to add implied
13544          * elements like html/body/etc...
13545          */
13546         options |= HTML_PARSE_NOIMPLIED;
13547     }
13548 #endif
13549     else
13550         return(XML_ERR_INTERNAL_ERROR);
13551 
13552     if (ctxt == NULL)
13553         return(XML_ERR_NO_MEMORY);
13554 
13555     /*
13556      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13557      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13558      * we must wait until the last moment to free the original one.
13559      */
13560     if (doc->dict != NULL) {
13561         if (ctxt->dict != NULL)
13562 	    xmlDictFree(ctxt->dict);
13563 	ctxt->dict = doc->dict;
13564     } else
13565         options |= XML_PARSE_NODICT;
13566 
13567     if (doc->encoding != NULL) {
13568         xmlCharEncodingHandlerPtr hdlr;
13569 
13570         if (ctxt->encoding != NULL)
13571 	    xmlFree((xmlChar *) ctxt->encoding);
13572         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13573 
13574         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13575         if (hdlr != NULL) {
13576             xmlSwitchToEncoding(ctxt, hdlr);
13577 	} else {
13578             return(XML_ERR_UNSUPPORTED_ENCODING);
13579         }
13580     }
13581 
13582     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13583     xmlDetectSAX2(ctxt);
13584     ctxt->myDoc = doc;
13585     /* parsing in context, i.e. as within existing content */
13586     ctxt->input_id = 2;
13587     ctxt->instate = XML_PARSER_CONTENT;
13588 
13589     fake = xmlNewComment(NULL);
13590     if (fake == NULL) {
13591         xmlFreeParserCtxt(ctxt);
13592 	return(XML_ERR_NO_MEMORY);
13593     }
13594     xmlAddChild(node, fake);
13595 
13596     if (node->type == XML_ELEMENT_NODE) {
13597 	nodePush(ctxt, node);
13598 	/*
13599 	 * initialize the SAX2 namespaces stack
13600 	 */
13601 	cur = node;
13602 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13603 	    xmlNsPtr ns = cur->nsDef;
13604 	    const xmlChar *iprefix, *ihref;
13605 
13606 	    while (ns != NULL) {
13607 		if (ctxt->dict) {
13608 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13609 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13610 		} else {
13611 		    iprefix = ns->prefix;
13612 		    ihref = ns->href;
13613 		}
13614 
13615 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13616 		    nsPush(ctxt, iprefix, ihref);
13617 		    nsnr++;
13618 		}
13619 		ns = ns->next;
13620 	    }
13621 	    cur = cur->parent;
13622 	}
13623     }
13624 
13625     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13626 	/*
13627 	 * ID/IDREF registration will be done in xmlValidateElement below
13628 	 */
13629 	ctxt->loadsubset |= XML_SKIP_IDS;
13630     }
13631 
13632 #ifdef LIBXML_HTML_ENABLED
13633     if (doc->type == XML_HTML_DOCUMENT_NODE)
13634         __htmlParseContent(ctxt);
13635     else
13636 #endif
13637 	xmlParseContent(ctxt);
13638 
13639     nsPop(ctxt, nsnr);
13640     if ((RAW == '<') && (NXT(1) == '/')) {
13641 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13642     } else if (RAW != 0) {
13643 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13644     }
13645     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13646 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13647 	ctxt->wellFormed = 0;
13648     }
13649 
13650     if (!ctxt->wellFormed) {
13651         if (ctxt->errNo == 0)
13652 	    ret = XML_ERR_INTERNAL_ERROR;
13653 	else
13654 	    ret = (xmlParserErrors)ctxt->errNo;
13655     } else {
13656         ret = XML_ERR_OK;
13657     }
13658 
13659     /*
13660      * Return the newly created nodeset after unlinking it from
13661      * the pseudo sibling.
13662      */
13663 
13664     cur = fake->next;
13665     fake->next = NULL;
13666     node->last = fake;
13667 
13668     if (cur != NULL) {
13669 	cur->prev = NULL;
13670     }
13671 
13672     *lst = cur;
13673 
13674     while (cur != NULL) {
13675 	cur->parent = NULL;
13676 	cur = cur->next;
13677     }
13678 
13679     xmlUnlinkNode(fake);
13680     xmlFreeNode(fake);
13681 
13682 
13683     if (ret != XML_ERR_OK) {
13684         xmlFreeNodeList(*lst);
13685 	*lst = NULL;
13686     }
13687 
13688     if (doc->dict != NULL)
13689         ctxt->dict = NULL;
13690     xmlFreeParserCtxt(ctxt);
13691 
13692     return(ret);
13693 #else /* !SAX2 */
13694     return(XML_ERR_INTERNAL_ERROR);
13695 #endif
13696 }
13697 
13698 #ifdef LIBXML_SAX1_ENABLED
13699 /**
13700  * xmlParseBalancedChunkMemoryRecover:
13701  * @doc:  the document the chunk pertains to (must not be NULL)
13702  * @sax:  the SAX handler block (possibly NULL)
13703  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13704  * @depth:  Used for loop detection, use 0
13705  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13706  * @lst:  the return value for the set of parsed nodes
13707  * @recover: return nodes even if the data is broken (use 0)
13708  *
13709  *
13710  * Parse a well-balanced chunk of an XML document
13711  * called by the parser
13712  * The allowed sequence for the Well Balanced Chunk is the one defined by
13713  * the content production in the XML grammar:
13714  *
13715  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13716  *
13717  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13718  *    the parser error code otherwise
13719  *
13720  * In case recover is set to 1, the nodelist will not be empty even if
13721  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13722  * some extent.
13723  */
13724 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13725 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13726      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13727      int recover) {
13728     xmlParserCtxtPtr ctxt;
13729     xmlDocPtr newDoc;
13730     xmlSAXHandlerPtr oldsax = NULL;
13731     xmlNodePtr content, newRoot;
13732     int size;
13733     int ret = 0;
13734 
13735     if (depth > 40) {
13736 	return(XML_ERR_ENTITY_LOOP);
13737     }
13738 
13739 
13740     if (lst != NULL)
13741         *lst = NULL;
13742     if (string == NULL)
13743         return(-1);
13744 
13745     size = xmlStrlen(string);
13746 
13747     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13748     if (ctxt == NULL) return(-1);
13749     ctxt->userData = ctxt;
13750     if (sax != NULL) {
13751 	oldsax = ctxt->sax;
13752         ctxt->sax = sax;
13753 	if (user_data != NULL)
13754 	    ctxt->userData = user_data;
13755     }
13756     newDoc = xmlNewDoc(BAD_CAST "1.0");
13757     if (newDoc == NULL) {
13758 	xmlFreeParserCtxt(ctxt);
13759 	return(-1);
13760     }
13761     newDoc->properties = XML_DOC_INTERNAL;
13762     if ((doc != NULL) && (doc->dict != NULL)) {
13763         xmlDictFree(ctxt->dict);
13764 	ctxt->dict = doc->dict;
13765 	xmlDictReference(ctxt->dict);
13766 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13767 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13768 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13769 	ctxt->dictNames = 1;
13770     } else {
13771 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13772     }
13773     /* doc == NULL is only supported for historic reasons */
13774     if (doc != NULL) {
13775 	newDoc->intSubset = doc->intSubset;
13776 	newDoc->extSubset = doc->extSubset;
13777     }
13778     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13779     if (newRoot == NULL) {
13780 	if (sax != NULL)
13781 	    ctxt->sax = oldsax;
13782 	xmlFreeParserCtxt(ctxt);
13783 	newDoc->intSubset = NULL;
13784 	newDoc->extSubset = NULL;
13785         xmlFreeDoc(newDoc);
13786 	return(-1);
13787     }
13788     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13789     nodePush(ctxt, newRoot);
13790     /* doc == NULL is only supported for historic reasons */
13791     if (doc == NULL) {
13792 	ctxt->myDoc = newDoc;
13793     } else {
13794 	ctxt->myDoc = newDoc;
13795 	newDoc->children->doc = doc;
13796 	/* Ensure that doc has XML spec namespace */
13797 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13798 	newDoc->oldNs = doc->oldNs;
13799     }
13800     ctxt->instate = XML_PARSER_CONTENT;
13801     ctxt->input_id = 2;
13802     ctxt->depth = depth;
13803 
13804     /*
13805      * Doing validity checking on chunk doesn't make sense
13806      */
13807     ctxt->validate = 0;
13808     ctxt->loadsubset = 0;
13809     xmlDetectSAX2(ctxt);
13810 
13811     if ( doc != NULL ){
13812         content = doc->children;
13813         doc->children = NULL;
13814         xmlParseContent(ctxt);
13815         doc->children = content;
13816     }
13817     else {
13818         xmlParseContent(ctxt);
13819     }
13820     if ((RAW == '<') && (NXT(1) == '/')) {
13821 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13822     } else if (RAW != 0) {
13823 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13824     }
13825     if (ctxt->node != newDoc->children) {
13826 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13827     }
13828 
13829     if (!ctxt->wellFormed) {
13830         if (ctxt->errNo == 0)
13831 	    ret = 1;
13832 	else
13833 	    ret = ctxt->errNo;
13834     } else {
13835       ret = 0;
13836     }
13837 
13838     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13839 	xmlNodePtr cur;
13840 
13841 	/*
13842 	 * Return the newly created nodeset after unlinking it from
13843 	 * they pseudo parent.
13844 	 */
13845 	cur = newDoc->children->children;
13846 	*lst = cur;
13847 	while (cur != NULL) {
13848 	    xmlSetTreeDoc(cur, doc);
13849 	    cur->parent = NULL;
13850 	    cur = cur->next;
13851 	}
13852 	newDoc->children->children = NULL;
13853     }
13854 
13855     if (sax != NULL)
13856 	ctxt->sax = oldsax;
13857     xmlFreeParserCtxt(ctxt);
13858     newDoc->intSubset = NULL;
13859     newDoc->extSubset = NULL;
13860     /* This leaks the namespace list if doc == NULL */
13861     newDoc->oldNs = NULL;
13862     xmlFreeDoc(newDoc);
13863 
13864     return(ret);
13865 }
13866 
13867 /**
13868  * xmlSAXParseEntity:
13869  * @sax:  the SAX handler block
13870  * @filename:  the filename
13871  *
13872  * parse an XML external entity out of context and build a tree.
13873  * It use the given SAX function block to handle the parsing callback.
13874  * If sax is NULL, fallback to the default DOM tree building routines.
13875  *
13876  * [78] extParsedEnt ::= TextDecl? content
13877  *
13878  * This correspond to a "Well Balanced" chunk
13879  *
13880  * Returns the resulting document tree
13881  */
13882 
13883 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13884 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13885     xmlDocPtr ret;
13886     xmlParserCtxtPtr ctxt;
13887 
13888     ctxt = xmlCreateFileParserCtxt(filename);
13889     if (ctxt == NULL) {
13890 	return(NULL);
13891     }
13892     if (sax != NULL) {
13893 	if (ctxt->sax != NULL)
13894 	    xmlFree(ctxt->sax);
13895         ctxt->sax = sax;
13896         ctxt->userData = NULL;
13897     }
13898 
13899     xmlParseExtParsedEnt(ctxt);
13900 
13901     if (ctxt->wellFormed)
13902 	ret = ctxt->myDoc;
13903     else {
13904         ret = NULL;
13905         xmlFreeDoc(ctxt->myDoc);
13906         ctxt->myDoc = NULL;
13907     }
13908     if (sax != NULL)
13909         ctxt->sax = NULL;
13910     xmlFreeParserCtxt(ctxt);
13911 
13912     return(ret);
13913 }
13914 
13915 /**
13916  * xmlParseEntity:
13917  * @filename:  the filename
13918  *
13919  * parse an XML external entity out of context and build a tree.
13920  *
13921  * [78] extParsedEnt ::= TextDecl? content
13922  *
13923  * This correspond to a "Well Balanced" chunk
13924  *
13925  * Returns the resulting document tree
13926  */
13927 
13928 xmlDocPtr
xmlParseEntity(const char * filename)13929 xmlParseEntity(const char *filename) {
13930     return(xmlSAXParseEntity(NULL, filename));
13931 }
13932 #endif /* LIBXML_SAX1_ENABLED */
13933 
13934 /**
13935  * xmlCreateEntityParserCtxtInternal:
13936  * @URL:  the entity URL
13937  * @ID:  the entity PUBLIC ID
13938  * @base:  a possible base for the target URI
13939  * @pctx:  parser context used to set options on new context
13940  *
13941  * Create a parser context for an external entity
13942  * Automatic support for ZLIB/Compress compressed document is provided
13943  * by default if found at compile-time.
13944  *
13945  * Returns the new parser context or NULL
13946  */
13947 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13948 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13949 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13950     xmlParserCtxtPtr ctxt;
13951     xmlParserInputPtr inputStream;
13952     char *directory = NULL;
13953     xmlChar *uri;
13954 
13955     ctxt = xmlNewParserCtxt();
13956     if (ctxt == NULL) {
13957 	return(NULL);
13958     }
13959 
13960     if (pctx != NULL) {
13961         ctxt->options = pctx->options;
13962         ctxt->_private = pctx->_private;
13963 	/*
13964 	 * this is a subparser of pctx, so the input_id should be
13965 	 * incremented to distinguish from main entity
13966 	 */
13967 	ctxt->input_id = pctx->input_id + 1;
13968     }
13969 
13970     /* Don't read from stdin. */
13971     if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13972         URL = BAD_CAST "./-";
13973 
13974     uri = xmlBuildURI(URL, base);
13975 
13976     if (uri == NULL) {
13977 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13978 	if (inputStream == NULL) {
13979 	    xmlFreeParserCtxt(ctxt);
13980 	    return(NULL);
13981 	}
13982 
13983 	inputPush(ctxt, inputStream);
13984 
13985 	if ((ctxt->directory == NULL) && (directory == NULL))
13986 	    directory = xmlParserGetDirectory((char *)URL);
13987 	if ((ctxt->directory == NULL) && (directory != NULL))
13988 	    ctxt->directory = directory;
13989     } else {
13990 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13991 	if (inputStream == NULL) {
13992 	    xmlFree(uri);
13993 	    xmlFreeParserCtxt(ctxt);
13994 	    return(NULL);
13995 	}
13996 
13997 	inputPush(ctxt, inputStream);
13998 
13999 	if ((ctxt->directory == NULL) && (directory == NULL))
14000 	    directory = xmlParserGetDirectory((char *)uri);
14001 	if ((ctxt->directory == NULL) && (directory != NULL))
14002 	    ctxt->directory = directory;
14003 	xmlFree(uri);
14004     }
14005     return(ctxt);
14006 }
14007 
14008 /**
14009  * xmlCreateEntityParserCtxt:
14010  * @URL:  the entity URL
14011  * @ID:  the entity PUBLIC ID
14012  * @base:  a possible base for the target URI
14013  *
14014  * Create a parser context for an external entity
14015  * Automatic support for ZLIB/Compress compressed document is provided
14016  * by default if found at compile-time.
14017  *
14018  * Returns the new parser context or NULL
14019  */
14020 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14021 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14022 	                  const xmlChar *base) {
14023     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14024 
14025 }
14026 
14027 /************************************************************************
14028  *									*
14029  *		Front ends when parsing from a file			*
14030  *									*
14031  ************************************************************************/
14032 
14033 /**
14034  * xmlCreateURLParserCtxt:
14035  * @filename:  the filename or URL
14036  * @options:  a combination of xmlParserOption
14037  *
14038  * Create a parser context for a file or URL content.
14039  * Automatic support for ZLIB/Compress compressed document is provided
14040  * by default if found at compile-time and for file accesses
14041  *
14042  * Returns the new parser context or NULL
14043  */
14044 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14045 xmlCreateURLParserCtxt(const char *filename, int options)
14046 {
14047     xmlParserCtxtPtr ctxt;
14048     xmlParserInputPtr inputStream;
14049     char *directory = NULL;
14050 
14051     ctxt = xmlNewParserCtxt();
14052     if (ctxt == NULL) {
14053 	xmlErrMemory(NULL, "cannot allocate parser context");
14054 	return(NULL);
14055     }
14056 
14057     if (options)
14058 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14059     ctxt->linenumbers = 1;
14060 
14061     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14062     if (inputStream == NULL) {
14063 	xmlFreeParserCtxt(ctxt);
14064 	return(NULL);
14065     }
14066 
14067     inputPush(ctxt, inputStream);
14068     if ((ctxt->directory == NULL) && (directory == NULL))
14069         directory = xmlParserGetDirectory(filename);
14070     if ((ctxt->directory == NULL) && (directory != NULL))
14071         ctxt->directory = directory;
14072 
14073     return(ctxt);
14074 }
14075 
14076 /**
14077  * xmlCreateFileParserCtxt:
14078  * @filename:  the filename
14079  *
14080  * Create a parser context for a file content.
14081  * Automatic support for ZLIB/Compress compressed document is provided
14082  * by default if found at compile-time.
14083  *
14084  * Returns the new parser context or NULL
14085  */
14086 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14087 xmlCreateFileParserCtxt(const char *filename)
14088 {
14089     return(xmlCreateURLParserCtxt(filename, 0));
14090 }
14091 
14092 #ifdef LIBXML_SAX1_ENABLED
14093 /**
14094  * xmlSAXParseFileWithData:
14095  * @sax:  the SAX handler block
14096  * @filename:  the filename
14097  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14098  *             documents
14099  * @data:  the userdata
14100  *
14101  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14102  * compressed document is provided by default if found at compile-time.
14103  * It use the given SAX function block to handle the parsing callback.
14104  * If sax is NULL, fallback to the default DOM tree building routines.
14105  *
14106  * User data (void *) is stored within the parser context in the
14107  * context's _private member, so it is available nearly everywhere in libxml
14108  *
14109  * Returns the resulting document tree
14110  */
14111 
14112 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14113 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14114                         int recovery, void *data) {
14115     xmlDocPtr ret;
14116     xmlParserCtxtPtr ctxt;
14117 
14118     xmlInitParser();
14119 
14120     ctxt = xmlCreateFileParserCtxt(filename);
14121     if (ctxt == NULL) {
14122 	return(NULL);
14123     }
14124     if (sax != NULL) {
14125 	if (ctxt->sax != NULL)
14126 	    xmlFree(ctxt->sax);
14127         ctxt->sax = sax;
14128     }
14129     xmlDetectSAX2(ctxt);
14130     if (data!=NULL) {
14131 	ctxt->_private = data;
14132     }
14133 
14134     if (ctxt->directory == NULL)
14135         ctxt->directory = xmlParserGetDirectory(filename);
14136 
14137     ctxt->recovery = recovery;
14138 
14139     xmlParseDocument(ctxt);
14140 
14141     if ((ctxt->wellFormed) || recovery) {
14142         ret = ctxt->myDoc;
14143 	if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14144 	    if (ctxt->input->buf->compressed > 0)
14145 		ret->compression = 9;
14146 	    else
14147 		ret->compression = ctxt->input->buf->compressed;
14148 	}
14149     }
14150     else {
14151        ret = NULL;
14152        xmlFreeDoc(ctxt->myDoc);
14153        ctxt->myDoc = NULL;
14154     }
14155     if (sax != NULL)
14156         ctxt->sax = NULL;
14157     xmlFreeParserCtxt(ctxt);
14158 
14159     return(ret);
14160 }
14161 
14162 /**
14163  * xmlSAXParseFile:
14164  * @sax:  the SAX handler block
14165  * @filename:  the filename
14166  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14167  *             documents
14168  *
14169  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14170  * compressed document is provided by default if found at compile-time.
14171  * It use the given SAX function block to handle the parsing callback.
14172  * If sax is NULL, fallback to the default DOM tree building routines.
14173  *
14174  * Returns the resulting document tree
14175  */
14176 
14177 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14178 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14179                           int recovery) {
14180     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14181 }
14182 
14183 /**
14184  * xmlRecoverDoc:
14185  * @cur:  a pointer to an array of xmlChar
14186  *
14187  * parse an XML in-memory document and build a tree.
14188  * In the case the document is not Well Formed, a attempt to build a
14189  * tree is tried anyway
14190  *
14191  * Returns the resulting document tree or NULL in case of failure
14192  */
14193 
14194 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14195 xmlRecoverDoc(const xmlChar *cur) {
14196     return(xmlSAXParseDoc(NULL, cur, 1));
14197 }
14198 
14199 /**
14200  * xmlParseFile:
14201  * @filename:  the filename
14202  *
14203  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204  * compressed document is provided by default if found at compile-time.
14205  *
14206  * Returns the resulting document tree if the file was wellformed,
14207  * NULL otherwise.
14208  */
14209 
14210 xmlDocPtr
xmlParseFile(const char * filename)14211 xmlParseFile(const char *filename) {
14212     return(xmlSAXParseFile(NULL, filename, 0));
14213 }
14214 
14215 /**
14216  * xmlRecoverFile:
14217  * @filename:  the filename
14218  *
14219  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14220  * compressed document is provided by default if found at compile-time.
14221  * In the case the document is not Well Formed, it attempts to build
14222  * a tree anyway
14223  *
14224  * Returns the resulting document tree or NULL in case of failure
14225  */
14226 
14227 xmlDocPtr
xmlRecoverFile(const char * filename)14228 xmlRecoverFile(const char *filename) {
14229     return(xmlSAXParseFile(NULL, filename, 1));
14230 }
14231 
14232 
14233 /**
14234  * xmlSetupParserForBuffer:
14235  * @ctxt:  an XML parser context
14236  * @buffer:  a xmlChar * buffer
14237  * @filename:  a file name
14238  *
14239  * Setup the parser context to parse a new buffer; Clears any prior
14240  * contents from the parser context. The buffer parameter must not be
14241  * NULL, but the filename parameter can be
14242  */
14243 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14244 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14245                              const char* filename)
14246 {
14247     xmlParserInputPtr input;
14248 
14249     if ((ctxt == NULL) || (buffer == NULL))
14250         return;
14251 
14252     input = xmlNewInputStream(ctxt);
14253     if (input == NULL) {
14254         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14255         xmlClearParserCtxt(ctxt);
14256         return;
14257     }
14258 
14259     xmlClearParserCtxt(ctxt);
14260     if (filename != NULL)
14261         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14262     input->base = buffer;
14263     input->cur = buffer;
14264     input->end = &buffer[xmlStrlen(buffer)];
14265     inputPush(ctxt, input);
14266 }
14267 
14268 /**
14269  * xmlSAXUserParseFile:
14270  * @sax:  a SAX handler
14271  * @user_data:  The user data returned on SAX callbacks
14272  * @filename:  a file name
14273  *
14274  * parse an XML file and call the given SAX handler routines.
14275  * Automatic support for ZLIB/Compress compressed document is provided
14276  *
14277  * Returns 0 in case of success or a error number otherwise
14278  */
14279 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14280 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14281                     const char *filename) {
14282     int ret = 0;
14283     xmlParserCtxtPtr ctxt;
14284 
14285     ctxt = xmlCreateFileParserCtxt(filename);
14286     if (ctxt == NULL) return -1;
14287     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14288 	xmlFree(ctxt->sax);
14289     ctxt->sax = sax;
14290     xmlDetectSAX2(ctxt);
14291 
14292     if (user_data != NULL)
14293 	ctxt->userData = user_data;
14294 
14295     xmlParseDocument(ctxt);
14296 
14297     if (ctxt->wellFormed)
14298 	ret = 0;
14299     else {
14300         if (ctxt->errNo != 0)
14301 	    ret = ctxt->errNo;
14302 	else
14303 	    ret = -1;
14304     }
14305     if (sax != NULL)
14306 	ctxt->sax = NULL;
14307     if (ctxt->myDoc != NULL) {
14308         xmlFreeDoc(ctxt->myDoc);
14309 	ctxt->myDoc = NULL;
14310     }
14311     xmlFreeParserCtxt(ctxt);
14312 
14313     return ret;
14314 }
14315 #endif /* LIBXML_SAX1_ENABLED */
14316 
14317 /************************************************************************
14318  *									*
14319  *		Front ends when parsing from memory			*
14320  *									*
14321  ************************************************************************/
14322 
14323 /**
14324  * xmlCreateMemoryParserCtxt:
14325  * @buffer:  a pointer to a char array
14326  * @size:  the size of the array
14327  *
14328  * Create a parser context for an XML in-memory document.
14329  *
14330  * Returns the new parser context or NULL
14331  */
14332 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14333 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14334     xmlParserCtxtPtr ctxt;
14335     xmlParserInputPtr input;
14336     xmlParserInputBufferPtr buf;
14337 
14338     if (buffer == NULL)
14339 	return(NULL);
14340     if (size <= 0)
14341 	return(NULL);
14342 
14343     ctxt = xmlNewParserCtxt();
14344     if (ctxt == NULL)
14345 	return(NULL);
14346 
14347     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14348     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14349     if (buf == NULL) {
14350 	xmlFreeParserCtxt(ctxt);
14351 	return(NULL);
14352     }
14353 
14354     input = xmlNewInputStream(ctxt);
14355     if (input == NULL) {
14356 	xmlFreeParserInputBuffer(buf);
14357 	xmlFreeParserCtxt(ctxt);
14358 	return(NULL);
14359     }
14360 
14361     input->filename = NULL;
14362     input->buf = buf;
14363     xmlBufResetInput(input->buf->buffer, input);
14364 
14365     inputPush(ctxt, input);
14366     return(ctxt);
14367 }
14368 
14369 #ifdef LIBXML_SAX1_ENABLED
14370 /**
14371  * xmlSAXParseMemoryWithData:
14372  * @sax:  the SAX handler block
14373  * @buffer:  an pointer to a char array
14374  * @size:  the size of the array
14375  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14376  *             documents
14377  * @data:  the userdata
14378  *
14379  * parse an XML in-memory block and use the given SAX function block
14380  * to handle the parsing callback. If sax is NULL, fallback to the default
14381  * DOM tree building routines.
14382  *
14383  * User data (void *) is stored within the parser context in the
14384  * context's _private member, so it is available nearly everywhere in libxml
14385  *
14386  * Returns the resulting document tree
14387  */
14388 
14389 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14390 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14391 	          int size, int recovery, void *data) {
14392     xmlDocPtr ret;
14393     xmlParserCtxtPtr ctxt;
14394 
14395     xmlInitParser();
14396 
14397     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14398     if (ctxt == NULL) return(NULL);
14399     if (sax != NULL) {
14400 	if (ctxt->sax != NULL)
14401 	    xmlFree(ctxt->sax);
14402         ctxt->sax = sax;
14403     }
14404     xmlDetectSAX2(ctxt);
14405     if (data!=NULL) {
14406 	ctxt->_private=data;
14407     }
14408 
14409     ctxt->recovery = recovery;
14410 
14411     xmlParseDocument(ctxt);
14412 
14413     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14414     else {
14415        ret = NULL;
14416        xmlFreeDoc(ctxt->myDoc);
14417        ctxt->myDoc = NULL;
14418     }
14419     if (sax != NULL)
14420 	ctxt->sax = NULL;
14421     xmlFreeParserCtxt(ctxt);
14422 
14423     return(ret);
14424 }
14425 
14426 /**
14427  * xmlSAXParseMemory:
14428  * @sax:  the SAX handler block
14429  * @buffer:  an pointer to a char array
14430  * @size:  the size of the array
14431  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14432  *             documents
14433  *
14434  * parse an XML in-memory block and use the given SAX function block
14435  * to handle the parsing callback. If sax is NULL, fallback to the default
14436  * DOM tree building routines.
14437  *
14438  * Returns the resulting document tree
14439  */
14440 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14441 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14442 	          int size, int recovery) {
14443     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14444 }
14445 
14446 /**
14447  * xmlParseMemory:
14448  * @buffer:  an pointer to a char array
14449  * @size:  the size of the array
14450  *
14451  * parse an XML in-memory block and build a tree.
14452  *
14453  * Returns the resulting document tree
14454  */
14455 
xmlParseMemory(const char * buffer,int size)14456 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14457    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14458 }
14459 
14460 /**
14461  * xmlRecoverMemory:
14462  * @buffer:  an pointer to a char array
14463  * @size:  the size of the array
14464  *
14465  * parse an XML in-memory block and build a tree.
14466  * In the case the document is not Well Formed, an attempt to
14467  * build a tree is tried anyway
14468  *
14469  * Returns the resulting document tree or NULL in case of error
14470  */
14471 
xmlRecoverMemory(const char * buffer,int size)14472 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14473    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14474 }
14475 
14476 /**
14477  * xmlSAXUserParseMemory:
14478  * @sax:  a SAX handler
14479  * @user_data:  The user data returned on SAX callbacks
14480  * @buffer:  an in-memory XML document input
14481  * @size:  the length of the XML document in bytes
14482  *
14483  * A better SAX parsing routine.
14484  * parse an XML in-memory buffer and call the given SAX handler routines.
14485  *
14486  * Returns 0 in case of success or a error number otherwise
14487  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14488 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14489 			  const char *buffer, int size) {
14490     int ret = 0;
14491     xmlParserCtxtPtr ctxt;
14492 
14493     xmlInitParser();
14494 
14495     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14496     if (ctxt == NULL) return -1;
14497     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14498         xmlFree(ctxt->sax);
14499     ctxt->sax = sax;
14500     xmlDetectSAX2(ctxt);
14501 
14502     if (user_data != NULL)
14503 	ctxt->userData = user_data;
14504 
14505     xmlParseDocument(ctxt);
14506 
14507     if (ctxt->wellFormed)
14508 	ret = 0;
14509     else {
14510         if (ctxt->errNo != 0)
14511 	    ret = ctxt->errNo;
14512 	else
14513 	    ret = -1;
14514     }
14515     if (sax != NULL)
14516         ctxt->sax = NULL;
14517     if (ctxt->myDoc != NULL) {
14518         xmlFreeDoc(ctxt->myDoc);
14519 	ctxt->myDoc = NULL;
14520     }
14521     xmlFreeParserCtxt(ctxt);
14522 
14523     return ret;
14524 }
14525 #endif /* LIBXML_SAX1_ENABLED */
14526 
14527 /**
14528  * xmlCreateDocParserCtxt:
14529  * @cur:  a pointer to an array of xmlChar
14530  *
14531  * Creates a parser context for an XML in-memory document.
14532  *
14533  * Returns the new parser context or NULL
14534  */
14535 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14536 xmlCreateDocParserCtxt(const xmlChar *cur) {
14537     int len;
14538 
14539     if (cur == NULL)
14540 	return(NULL);
14541     len = xmlStrlen(cur);
14542     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14543 }
14544 
14545 #ifdef LIBXML_SAX1_ENABLED
14546 /**
14547  * xmlSAXParseDoc:
14548  * @sax:  the SAX handler block
14549  * @cur:  a pointer to an array of xmlChar
14550  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14551  *             documents
14552  *
14553  * parse an XML in-memory document and build a tree.
14554  * It use the given SAX function block to handle the parsing callback.
14555  * If sax is NULL, fallback to the default DOM tree building routines.
14556  *
14557  * Returns the resulting document tree
14558  */
14559 
14560 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14561 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14562     xmlDocPtr ret;
14563     xmlParserCtxtPtr ctxt;
14564     xmlSAXHandlerPtr oldsax = NULL;
14565 
14566     if (cur == NULL) return(NULL);
14567 
14568 
14569     ctxt = xmlCreateDocParserCtxt(cur);
14570     if (ctxt == NULL) return(NULL);
14571     if (sax != NULL) {
14572         oldsax = ctxt->sax;
14573         ctxt->sax = sax;
14574         ctxt->userData = NULL;
14575     }
14576     xmlDetectSAX2(ctxt);
14577 
14578     xmlParseDocument(ctxt);
14579     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14580     else {
14581        ret = NULL;
14582        xmlFreeDoc(ctxt->myDoc);
14583        ctxt->myDoc = NULL;
14584     }
14585     if (sax != NULL)
14586 	ctxt->sax = oldsax;
14587     xmlFreeParserCtxt(ctxt);
14588 
14589     return(ret);
14590 }
14591 
14592 /**
14593  * xmlParseDoc:
14594  * @cur:  a pointer to an array of xmlChar
14595  *
14596  * parse an XML in-memory document and build a tree.
14597  *
14598  * Returns the resulting document tree
14599  */
14600 
14601 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14602 xmlParseDoc(const xmlChar *cur) {
14603     return(xmlSAXParseDoc(NULL, cur, 0));
14604 }
14605 #endif /* LIBXML_SAX1_ENABLED */
14606 
14607 #ifdef LIBXML_LEGACY_ENABLED
14608 /************************************************************************
14609  *									*
14610  *	Specific function to keep track of entities references		*
14611  *	and used by the XSLT debugger					*
14612  *									*
14613  ************************************************************************/
14614 
14615 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14616 
14617 /**
14618  * xmlAddEntityReference:
14619  * @ent : A valid entity
14620  * @firstNode : A valid first node for children of entity
14621  * @lastNode : A valid last node of children entity
14622  *
14623  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14624  */
14625 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14626 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14627                       xmlNodePtr lastNode)
14628 {
14629     if (xmlEntityRefFunc != NULL) {
14630         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14631     }
14632 }
14633 
14634 
14635 /**
14636  * xmlSetEntityReferenceFunc:
14637  * @func: A valid function
14638  *
14639  * Set the function to call call back when a xml reference has been made
14640  */
14641 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14642 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14643 {
14644     xmlEntityRefFunc = func;
14645 }
14646 #endif /* LIBXML_LEGACY_ENABLED */
14647 
14648 /************************************************************************
14649  *									*
14650  *				Miscellaneous				*
14651  *									*
14652  ************************************************************************/
14653 
14654 #ifdef LIBXML_XPATH_ENABLED
14655 #include <libxml/xpath.h>
14656 #endif
14657 
14658 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14659 static int xmlParserInitialized = 0;
14660 
14661 /**
14662  * xmlInitParser:
14663  *
14664  * Initialization function for the XML parser.
14665  * This is not reentrant. Call once before processing in case of
14666  * use in multithreaded programs.
14667  */
14668 
14669 void
xmlInitParser(void)14670 xmlInitParser(void) {
14671     if (xmlParserInitialized != 0)
14672 	return;
14673 
14674 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14675 	atexit(xmlCleanupParser);
14676 #endif
14677 
14678 #ifdef LIBXML_THREAD_ENABLED
14679     __xmlGlobalInitMutexLock();
14680     if (xmlParserInitialized == 0) {
14681 #endif
14682 	xmlInitThreads();
14683 	xmlInitGlobals();
14684 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14685 	    (xmlGenericError == NULL))
14686 	    initGenericErrorDefaultFunc(NULL);
14687 	xmlInitMemory();
14688         xmlInitializeDict();
14689 	xmlInitCharEncodingHandlers();
14690 	xmlDefaultSAXHandlerInit();
14691 	xmlRegisterDefaultInputCallbacks();
14692 #ifdef LIBXML_OUTPUT_ENABLED
14693 	xmlRegisterDefaultOutputCallbacks();
14694 #endif /* LIBXML_OUTPUT_ENABLED */
14695 #ifdef LIBXML_HTML_ENABLED
14696 	htmlInitAutoClose();
14697 	htmlDefaultSAXHandlerInit();
14698 #endif
14699 #ifdef LIBXML_XPATH_ENABLED
14700 	xmlXPathInit();
14701 #endif
14702 	xmlParserInitialized = 1;
14703 #ifdef LIBXML_THREAD_ENABLED
14704     }
14705     __xmlGlobalInitMutexUnlock();
14706 #endif
14707 }
14708 
14709 /**
14710  * xmlCleanupParser:
14711  *
14712  * This function name is somewhat misleading. It does not clean up
14713  * parser state, it cleans up memory allocated by the library itself.
14714  * It is a cleanup function for the XML library. It tries to reclaim all
14715  * related global memory allocated for the library processing.
14716  * It doesn't deallocate any document related memory. One should
14717  * call xmlCleanupParser() only when the process has finished using
14718  * the library and all XML/HTML documents built with it.
14719  * See also xmlInitParser() which has the opposite function of preparing
14720  * the library for operations.
14721  *
14722  * WARNING: if your application is multithreaded or has plugin support
14723  *          calling this may crash the application if another thread or
14724  *          a plugin is still using libxml2. It's sometimes very hard to
14725  *          guess if libxml2 is in use in the application, some libraries
14726  *          or plugins may use it without notice. In case of doubt abstain
14727  *          from calling this function or do it just before calling exit()
14728  *          to avoid leak reports from valgrind !
14729  */
14730 
14731 void
xmlCleanupParser(void)14732 xmlCleanupParser(void) {
14733     if (!xmlParserInitialized)
14734 	return;
14735 
14736     xmlCleanupCharEncodingHandlers();
14737 #ifdef LIBXML_CATALOG_ENABLED
14738     xmlCatalogCleanup();
14739 #endif
14740     xmlDictCleanup();
14741     xmlCleanupInputCallbacks();
14742 #ifdef LIBXML_OUTPUT_ENABLED
14743     xmlCleanupOutputCallbacks();
14744 #endif
14745 #ifdef LIBXML_SCHEMAS_ENABLED
14746     xmlSchemaCleanupTypes();
14747     xmlRelaxNGCleanupTypes();
14748 #endif
14749     xmlResetLastError();
14750     xmlCleanupGlobals();
14751     xmlCleanupThreads(); /* must be last if called not from the main thread */
14752     xmlCleanupMemory();
14753     xmlParserInitialized = 0;
14754 }
14755 
14756 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14757     !defined(_WIN32)
14758 static void
14759 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14760 xmlDestructor(void) {
14761     /*
14762      * Calling custom deallocation functions in a destructor can cause
14763      * problems, for example with Nokogiri.
14764      */
14765     if (xmlFree == free)
14766         xmlCleanupParser();
14767 }
14768 #endif
14769 
14770 /************************************************************************
14771  *									*
14772  *	New set (2.6.0) of simpler and more flexible APIs		*
14773  *									*
14774  ************************************************************************/
14775 
14776 /**
14777  * DICT_FREE:
14778  * @str:  a string
14779  *
14780  * Free a string if it is not owned by the "dict" dictionary in the
14781  * current scope
14782  */
14783 #define DICT_FREE(str)						\
14784 	if ((str) && ((!dict) ||				\
14785 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14786 	    xmlFree((char *)(str));
14787 
14788 /**
14789  * xmlCtxtReset:
14790  * @ctxt: an XML parser context
14791  *
14792  * Reset a parser context
14793  */
14794 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14795 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14796 {
14797     xmlParserInputPtr input;
14798     xmlDictPtr dict;
14799 
14800     if (ctxt == NULL)
14801         return;
14802 
14803     dict = ctxt->dict;
14804 
14805     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14806         xmlFreeInputStream(input);
14807     }
14808     ctxt->inputNr = 0;
14809     ctxt->input = NULL;
14810 
14811     ctxt->spaceNr = 0;
14812     if (ctxt->spaceTab != NULL) {
14813 	ctxt->spaceTab[0] = -1;
14814 	ctxt->space = &ctxt->spaceTab[0];
14815     } else {
14816         ctxt->space = NULL;
14817     }
14818 
14819 
14820     ctxt->nodeNr = 0;
14821     ctxt->node = NULL;
14822 
14823     ctxt->nameNr = 0;
14824     ctxt->name = NULL;
14825 
14826     DICT_FREE(ctxt->version);
14827     ctxt->version = NULL;
14828     DICT_FREE(ctxt->encoding);
14829     ctxt->encoding = NULL;
14830     DICT_FREE(ctxt->directory);
14831     ctxt->directory = NULL;
14832     DICT_FREE(ctxt->extSubURI);
14833     ctxt->extSubURI = NULL;
14834     DICT_FREE(ctxt->extSubSystem);
14835     ctxt->extSubSystem = NULL;
14836     if (ctxt->myDoc != NULL)
14837         xmlFreeDoc(ctxt->myDoc);
14838     ctxt->myDoc = NULL;
14839 
14840     ctxt->standalone = -1;
14841     ctxt->hasExternalSubset = 0;
14842     ctxt->hasPErefs = 0;
14843     ctxt->html = 0;
14844     ctxt->external = 0;
14845     ctxt->instate = XML_PARSER_START;
14846     ctxt->token = 0;
14847 
14848     ctxt->wellFormed = 1;
14849     ctxt->nsWellFormed = 1;
14850     ctxt->disableSAX = 0;
14851     ctxt->valid = 1;
14852 #if 0
14853     ctxt->vctxt.userData = ctxt;
14854     ctxt->vctxt.error = xmlParserValidityError;
14855     ctxt->vctxt.warning = xmlParserValidityWarning;
14856 #endif
14857     ctxt->record_info = 0;
14858     ctxt->checkIndex = 0;
14859     ctxt->inSubset = 0;
14860     ctxt->errNo = XML_ERR_OK;
14861     ctxt->depth = 0;
14862     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14863     ctxt->catalogs = NULL;
14864     ctxt->nbentities = 0;
14865     ctxt->sizeentities = 0;
14866     ctxt->sizeentcopy = 0;
14867     xmlInitNodeInfoSeq(&ctxt->node_seq);
14868 
14869     if (ctxt->attsDefault != NULL) {
14870         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14871         ctxt->attsDefault = NULL;
14872     }
14873     if (ctxt->attsSpecial != NULL) {
14874         xmlHashFree(ctxt->attsSpecial, NULL);
14875         ctxt->attsSpecial = NULL;
14876     }
14877 
14878 #ifdef LIBXML_CATALOG_ENABLED
14879     if (ctxt->catalogs != NULL)
14880 	xmlCatalogFreeLocal(ctxt->catalogs);
14881 #endif
14882     if (ctxt->lastError.code != XML_ERR_OK)
14883         xmlResetError(&ctxt->lastError);
14884 }
14885 
14886 /**
14887  * xmlCtxtResetPush:
14888  * @ctxt: an XML parser context
14889  * @chunk:  a pointer to an array of chars
14890  * @size:  number of chars in the array
14891  * @filename:  an optional file name or URI
14892  * @encoding:  the document encoding, or NULL
14893  *
14894  * Reset a push parser context
14895  *
14896  * Returns 0 in case of success and 1 in case of error
14897  */
14898 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14899 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14900                  int size, const char *filename, const char *encoding)
14901 {
14902     xmlParserInputPtr inputStream;
14903     xmlParserInputBufferPtr buf;
14904     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14905 
14906     if (ctxt == NULL)
14907         return(1);
14908 
14909     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14910         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14911 
14912     buf = xmlAllocParserInputBuffer(enc);
14913     if (buf == NULL)
14914         return(1);
14915 
14916     if (ctxt == NULL) {
14917         xmlFreeParserInputBuffer(buf);
14918         return(1);
14919     }
14920 
14921     xmlCtxtReset(ctxt);
14922 
14923     if (filename == NULL) {
14924         ctxt->directory = NULL;
14925     } else {
14926         ctxt->directory = xmlParserGetDirectory(filename);
14927     }
14928 
14929     inputStream = xmlNewInputStream(ctxt);
14930     if (inputStream == NULL) {
14931         xmlFreeParserInputBuffer(buf);
14932         return(1);
14933     }
14934 
14935     if (filename == NULL)
14936         inputStream->filename = NULL;
14937     else
14938         inputStream->filename = (char *)
14939             xmlCanonicPath((const xmlChar *) filename);
14940     inputStream->buf = buf;
14941     xmlBufResetInput(buf->buffer, inputStream);
14942 
14943     inputPush(ctxt, inputStream);
14944 
14945     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14946         (ctxt->input->buf != NULL)) {
14947 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14948         size_t cur = ctxt->input->cur - ctxt->input->base;
14949 
14950         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14951 
14952         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14953 #ifdef DEBUG_PUSH
14954         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14955 #endif
14956     }
14957 
14958     if (encoding != NULL) {
14959         xmlCharEncodingHandlerPtr hdlr;
14960 
14961         if (ctxt->encoding != NULL)
14962 	    xmlFree((xmlChar *) ctxt->encoding);
14963         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14964 
14965         hdlr = xmlFindCharEncodingHandler(encoding);
14966         if (hdlr != NULL) {
14967             xmlSwitchToEncoding(ctxt, hdlr);
14968 	} else {
14969 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14970 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14971         }
14972     } else if (enc != XML_CHAR_ENCODING_NONE) {
14973         xmlSwitchEncoding(ctxt, enc);
14974     }
14975 
14976     return(0);
14977 }
14978 
14979 
14980 /**
14981  * xmlCtxtUseOptionsInternal:
14982  * @ctxt: an XML parser context
14983  * @options:  a combination of xmlParserOption
14984  * @encoding:  the user provided encoding to use
14985  *
14986  * Applies the options to the parser context
14987  *
14988  * Returns 0 in case of success, the set of unknown or unimplemented options
14989  *         in case of error.
14990  */
14991 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14992 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14993 {
14994     if (ctxt == NULL)
14995         return(-1);
14996     if (encoding != NULL) {
14997         if (ctxt->encoding != NULL)
14998 	    xmlFree((xmlChar *) ctxt->encoding);
14999         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15000     }
15001     if (options & XML_PARSE_RECOVER) {
15002         ctxt->recovery = 1;
15003         options -= XML_PARSE_RECOVER;
15004 	ctxt->options |= XML_PARSE_RECOVER;
15005     } else
15006         ctxt->recovery = 0;
15007     if (options & XML_PARSE_DTDLOAD) {
15008         ctxt->loadsubset = XML_DETECT_IDS;
15009         options -= XML_PARSE_DTDLOAD;
15010 	ctxt->options |= XML_PARSE_DTDLOAD;
15011     } else
15012         ctxt->loadsubset = 0;
15013     if (options & XML_PARSE_DTDATTR) {
15014         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15015         options -= XML_PARSE_DTDATTR;
15016 	ctxt->options |= XML_PARSE_DTDATTR;
15017     }
15018     if (options & XML_PARSE_NOENT) {
15019         ctxt->replaceEntities = 1;
15020         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15021         options -= XML_PARSE_NOENT;
15022 	ctxt->options |= XML_PARSE_NOENT;
15023     } else
15024         ctxt->replaceEntities = 0;
15025     if (options & XML_PARSE_PEDANTIC) {
15026         ctxt->pedantic = 1;
15027         options -= XML_PARSE_PEDANTIC;
15028 	ctxt->options |= XML_PARSE_PEDANTIC;
15029     } else
15030         ctxt->pedantic = 0;
15031     if (options & XML_PARSE_NOBLANKS) {
15032         ctxt->keepBlanks = 0;
15033         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15034         options -= XML_PARSE_NOBLANKS;
15035 	ctxt->options |= XML_PARSE_NOBLANKS;
15036     } else
15037         ctxt->keepBlanks = 1;
15038     if (options & XML_PARSE_DTDVALID) {
15039         ctxt->validate = 1;
15040         if (options & XML_PARSE_NOWARNING)
15041             ctxt->vctxt.warning = NULL;
15042         if (options & XML_PARSE_NOERROR)
15043             ctxt->vctxt.error = NULL;
15044         options -= XML_PARSE_DTDVALID;
15045 	ctxt->options |= XML_PARSE_DTDVALID;
15046     } else
15047         ctxt->validate = 0;
15048     if (options & XML_PARSE_NOWARNING) {
15049         ctxt->sax->warning = NULL;
15050         options -= XML_PARSE_NOWARNING;
15051     }
15052     if (options & XML_PARSE_NOERROR) {
15053         ctxt->sax->error = NULL;
15054         ctxt->sax->fatalError = NULL;
15055         options -= XML_PARSE_NOERROR;
15056     }
15057 #ifdef LIBXML_SAX1_ENABLED
15058     if (options & XML_PARSE_SAX1) {
15059         ctxt->sax->startElement = xmlSAX2StartElement;
15060         ctxt->sax->endElement = xmlSAX2EndElement;
15061         ctxt->sax->startElementNs = NULL;
15062         ctxt->sax->endElementNs = NULL;
15063         ctxt->sax->initialized = 1;
15064         options -= XML_PARSE_SAX1;
15065 	ctxt->options |= XML_PARSE_SAX1;
15066     }
15067 #endif /* LIBXML_SAX1_ENABLED */
15068     if (options & XML_PARSE_NODICT) {
15069         ctxt->dictNames = 0;
15070         options -= XML_PARSE_NODICT;
15071 	ctxt->options |= XML_PARSE_NODICT;
15072     } else {
15073         ctxt->dictNames = 1;
15074     }
15075     if (options & XML_PARSE_NOCDATA) {
15076         ctxt->sax->cdataBlock = NULL;
15077         options -= XML_PARSE_NOCDATA;
15078 	ctxt->options |= XML_PARSE_NOCDATA;
15079     }
15080     if (options & XML_PARSE_NSCLEAN) {
15081 	ctxt->options |= XML_PARSE_NSCLEAN;
15082         options -= XML_PARSE_NSCLEAN;
15083     }
15084     if (options & XML_PARSE_NONET) {
15085 	ctxt->options |= XML_PARSE_NONET;
15086         options -= XML_PARSE_NONET;
15087     }
15088     if (options & XML_PARSE_COMPACT) {
15089 	ctxt->options |= XML_PARSE_COMPACT;
15090         options -= XML_PARSE_COMPACT;
15091     }
15092     if (options & XML_PARSE_OLD10) {
15093 	ctxt->options |= XML_PARSE_OLD10;
15094         options -= XML_PARSE_OLD10;
15095     }
15096     if (options & XML_PARSE_NOBASEFIX) {
15097 	ctxt->options |= XML_PARSE_NOBASEFIX;
15098         options -= XML_PARSE_NOBASEFIX;
15099     }
15100     if (options & XML_PARSE_HUGE) {
15101 	ctxt->options |= XML_PARSE_HUGE;
15102         options -= XML_PARSE_HUGE;
15103         if (ctxt->dict != NULL)
15104             xmlDictSetLimit(ctxt->dict, 0);
15105     }
15106     if (options & XML_PARSE_OLDSAX) {
15107 	ctxt->options |= XML_PARSE_OLDSAX;
15108         options -= XML_PARSE_OLDSAX;
15109     }
15110     if (options & XML_PARSE_IGNORE_ENC) {
15111 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15112         options -= XML_PARSE_IGNORE_ENC;
15113     }
15114     if (options & XML_PARSE_BIG_LINES) {
15115 	ctxt->options |= XML_PARSE_BIG_LINES;
15116         options -= XML_PARSE_BIG_LINES;
15117     }
15118     ctxt->linenumbers = 1;
15119     return (options);
15120 }
15121 
15122 /**
15123  * xmlCtxtUseOptions:
15124  * @ctxt: an XML parser context
15125  * @options:  a combination of xmlParserOption
15126  *
15127  * Applies the options to the parser context
15128  *
15129  * Returns 0 in case of success, the set of unknown or unimplemented options
15130  *         in case of error.
15131  */
15132 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15133 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15134 {
15135    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15136 }
15137 
15138 /**
15139  * xmlDoRead:
15140  * @ctxt:  an XML parser context
15141  * @URL:  the base URL to use for the document
15142  * @encoding:  the document encoding, or NULL
15143  * @options:  a combination of xmlParserOption
15144  * @reuse:  keep the context for reuse
15145  *
15146  * Common front-end for the xmlRead functions
15147  *
15148  * Returns the resulting document tree or NULL
15149  */
15150 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15151 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15152           int options, int reuse)
15153 {
15154     xmlDocPtr ret;
15155 
15156     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15157     if (encoding != NULL) {
15158         xmlCharEncodingHandlerPtr hdlr;
15159 
15160 	hdlr = xmlFindCharEncodingHandler(encoding);
15161 	if (hdlr != NULL)
15162 	    xmlSwitchToEncoding(ctxt, hdlr);
15163     }
15164     if ((URL != NULL) && (ctxt->input != NULL) &&
15165         (ctxt->input->filename == NULL))
15166         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15167     xmlParseDocument(ctxt);
15168     if ((ctxt->wellFormed) || ctxt->recovery)
15169         ret = ctxt->myDoc;
15170     else {
15171         ret = NULL;
15172 	if (ctxt->myDoc != NULL) {
15173 	    xmlFreeDoc(ctxt->myDoc);
15174 	}
15175     }
15176     ctxt->myDoc = NULL;
15177     if (!reuse) {
15178 	xmlFreeParserCtxt(ctxt);
15179     }
15180 
15181     return (ret);
15182 }
15183 
15184 /**
15185  * xmlReadDoc:
15186  * @cur:  a pointer to a zero terminated string
15187  * @URL:  the base URL to use for the document
15188  * @encoding:  the document encoding, or NULL
15189  * @options:  a combination of xmlParserOption
15190  *
15191  * parse an XML in-memory document and build a tree.
15192  *
15193  * Returns the resulting document tree
15194  */
15195 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15196 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15197 {
15198     xmlParserCtxtPtr ctxt;
15199 
15200     if (cur == NULL)
15201         return (NULL);
15202     xmlInitParser();
15203 
15204     ctxt = xmlCreateDocParserCtxt(cur);
15205     if (ctxt == NULL)
15206         return (NULL);
15207     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15208 }
15209 
15210 /**
15211  * xmlReadFile:
15212  * @filename:  a file or URL
15213  * @encoding:  the document encoding, or NULL
15214  * @options:  a combination of xmlParserOption
15215  *
15216  * parse an XML file from the filesystem or the network.
15217  *
15218  * Returns the resulting document tree
15219  */
15220 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15221 xmlReadFile(const char *filename, const char *encoding, int options)
15222 {
15223     xmlParserCtxtPtr ctxt;
15224 
15225     xmlInitParser();
15226     ctxt = xmlCreateURLParserCtxt(filename, options);
15227     if (ctxt == NULL)
15228         return (NULL);
15229     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15230 }
15231 
15232 /**
15233  * xmlReadMemory:
15234  * @buffer:  a pointer to a char array
15235  * @size:  the size of the array
15236  * @URL:  the base URL to use for the document
15237  * @encoding:  the document encoding, or NULL
15238  * @options:  a combination of xmlParserOption
15239  *
15240  * parse an XML in-memory document and build a tree.
15241  *
15242  * Returns the resulting document tree
15243  */
15244 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15245 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15246 {
15247     xmlParserCtxtPtr ctxt;
15248 
15249     xmlInitParser();
15250     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15251     if (ctxt == NULL)
15252         return (NULL);
15253     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15254 }
15255 
15256 /**
15257  * xmlReadFd:
15258  * @fd:  an open file descriptor
15259  * @URL:  the base URL to use for the document
15260  * @encoding:  the document encoding, or NULL
15261  * @options:  a combination of xmlParserOption
15262  *
15263  * parse an XML from a file descriptor and build a tree.
15264  * NOTE that the file descriptor will not be closed when the
15265  *      reader is closed or reset.
15266  *
15267  * Returns the resulting document tree
15268  */
15269 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15270 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15271 {
15272     xmlParserCtxtPtr ctxt;
15273     xmlParserInputBufferPtr input;
15274     xmlParserInputPtr stream;
15275 
15276     if (fd < 0)
15277         return (NULL);
15278     xmlInitParser();
15279 
15280     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15281     if (input == NULL)
15282         return (NULL);
15283     input->closecallback = NULL;
15284     ctxt = xmlNewParserCtxt();
15285     if (ctxt == NULL) {
15286         xmlFreeParserInputBuffer(input);
15287         return (NULL);
15288     }
15289     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15290     if (stream == NULL) {
15291         xmlFreeParserInputBuffer(input);
15292 	xmlFreeParserCtxt(ctxt);
15293         return (NULL);
15294     }
15295     inputPush(ctxt, stream);
15296     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15297 }
15298 
15299 /**
15300  * xmlReadIO:
15301  * @ioread:  an I/O read function
15302  * @ioclose:  an I/O close function
15303  * @ioctx:  an I/O handler
15304  * @URL:  the base URL to use for the document
15305  * @encoding:  the document encoding, or NULL
15306  * @options:  a combination of xmlParserOption
15307  *
15308  * parse an XML document from I/O functions and source and build a tree.
15309  *
15310  * Returns the resulting document tree
15311  */
15312 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15313 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15314           void *ioctx, const char *URL, const char *encoding, int options)
15315 {
15316     xmlParserCtxtPtr ctxt;
15317     xmlParserInputBufferPtr input;
15318     xmlParserInputPtr stream;
15319 
15320     if (ioread == NULL)
15321         return (NULL);
15322     xmlInitParser();
15323 
15324     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15325                                          XML_CHAR_ENCODING_NONE);
15326     if (input == NULL) {
15327         if (ioclose != NULL)
15328             ioclose(ioctx);
15329         return (NULL);
15330     }
15331     ctxt = xmlNewParserCtxt();
15332     if (ctxt == NULL) {
15333         xmlFreeParserInputBuffer(input);
15334         return (NULL);
15335     }
15336     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15337     if (stream == NULL) {
15338         xmlFreeParserInputBuffer(input);
15339 	xmlFreeParserCtxt(ctxt);
15340         return (NULL);
15341     }
15342     inputPush(ctxt, stream);
15343     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15344 }
15345 
15346 /**
15347  * xmlCtxtReadDoc:
15348  * @ctxt:  an XML parser context
15349  * @cur:  a pointer to a zero terminated string
15350  * @URL:  the base URL to use for the document
15351  * @encoding:  the document encoding, or NULL
15352  * @options:  a combination of xmlParserOption
15353  *
15354  * parse an XML in-memory document and build a tree.
15355  * This reuses the existing @ctxt parser context
15356  *
15357  * Returns the resulting document tree
15358  */
15359 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15360 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15361                const char *URL, const char *encoding, int options)
15362 {
15363     xmlParserInputPtr stream;
15364 
15365     if (cur == NULL)
15366         return (NULL);
15367     if (ctxt == NULL)
15368         return (NULL);
15369     xmlInitParser();
15370 
15371     xmlCtxtReset(ctxt);
15372 
15373     stream = xmlNewStringInputStream(ctxt, cur);
15374     if (stream == NULL) {
15375         return (NULL);
15376     }
15377     inputPush(ctxt, stream);
15378     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15379 }
15380 
15381 /**
15382  * xmlCtxtReadFile:
15383  * @ctxt:  an XML parser context
15384  * @filename:  a file or URL
15385  * @encoding:  the document encoding, or NULL
15386  * @options:  a combination of xmlParserOption
15387  *
15388  * parse an XML file from the filesystem or the network.
15389  * This reuses the existing @ctxt parser context
15390  *
15391  * Returns the resulting document tree
15392  */
15393 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15394 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15395                 const char *encoding, int options)
15396 {
15397     xmlParserInputPtr stream;
15398 
15399     if (filename == NULL)
15400         return (NULL);
15401     if (ctxt == NULL)
15402         return (NULL);
15403     xmlInitParser();
15404 
15405     xmlCtxtReset(ctxt);
15406 
15407     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15408     if (stream == NULL) {
15409         return (NULL);
15410     }
15411     inputPush(ctxt, stream);
15412     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15413 }
15414 
15415 /**
15416  * xmlCtxtReadMemory:
15417  * @ctxt:  an XML parser context
15418  * @buffer:  a pointer to a char array
15419  * @size:  the size of the array
15420  * @URL:  the base URL to use for the document
15421  * @encoding:  the document encoding, or NULL
15422  * @options:  a combination of xmlParserOption
15423  *
15424  * parse an XML in-memory document and build a tree.
15425  * This reuses the existing @ctxt parser context
15426  *
15427  * Returns the resulting document tree
15428  */
15429 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15430 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15431                   const char *URL, const char *encoding, int options)
15432 {
15433     xmlParserInputBufferPtr input;
15434     xmlParserInputPtr stream;
15435 
15436     if (ctxt == NULL)
15437         return (NULL);
15438     if (buffer == NULL)
15439         return (NULL);
15440     xmlInitParser();
15441 
15442     xmlCtxtReset(ctxt);
15443 
15444     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15445     if (input == NULL) {
15446 	return(NULL);
15447     }
15448 
15449     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15450     if (stream == NULL) {
15451 	xmlFreeParserInputBuffer(input);
15452 	return(NULL);
15453     }
15454 
15455     inputPush(ctxt, stream);
15456     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15457 }
15458 
15459 /**
15460  * xmlCtxtReadFd:
15461  * @ctxt:  an XML parser context
15462  * @fd:  an open file descriptor
15463  * @URL:  the base URL to use for the document
15464  * @encoding:  the document encoding, or NULL
15465  * @options:  a combination of xmlParserOption
15466  *
15467  * parse an XML from a file descriptor and build a tree.
15468  * This reuses the existing @ctxt parser context
15469  * NOTE that the file descriptor will not be closed when the
15470  *      reader is closed or reset.
15471  *
15472  * Returns the resulting document tree
15473  */
15474 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15475 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15476               const char *URL, const char *encoding, int options)
15477 {
15478     xmlParserInputBufferPtr input;
15479     xmlParserInputPtr stream;
15480 
15481     if (fd < 0)
15482         return (NULL);
15483     if (ctxt == NULL)
15484         return (NULL);
15485     xmlInitParser();
15486 
15487     xmlCtxtReset(ctxt);
15488 
15489 
15490     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15491     if (input == NULL)
15492         return (NULL);
15493     input->closecallback = NULL;
15494     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15495     if (stream == NULL) {
15496         xmlFreeParserInputBuffer(input);
15497         return (NULL);
15498     }
15499     inputPush(ctxt, stream);
15500     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15501 }
15502 
15503 /**
15504  * xmlCtxtReadIO:
15505  * @ctxt:  an XML parser context
15506  * @ioread:  an I/O read function
15507  * @ioclose:  an I/O close function
15508  * @ioctx:  an I/O handler
15509  * @URL:  the base URL to use for the document
15510  * @encoding:  the document encoding, or NULL
15511  * @options:  a combination of xmlParserOption
15512  *
15513  * parse an XML document from I/O functions and source and build a tree.
15514  * This reuses the existing @ctxt parser context
15515  *
15516  * Returns the resulting document tree
15517  */
15518 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15519 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15520               xmlInputCloseCallback ioclose, void *ioctx,
15521 	      const char *URL,
15522               const char *encoding, int options)
15523 {
15524     xmlParserInputBufferPtr input;
15525     xmlParserInputPtr stream;
15526 
15527     if (ioread == NULL)
15528         return (NULL);
15529     if (ctxt == NULL)
15530         return (NULL);
15531     xmlInitParser();
15532 
15533     xmlCtxtReset(ctxt);
15534 
15535     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15536                                          XML_CHAR_ENCODING_NONE);
15537     if (input == NULL) {
15538         if (ioclose != NULL)
15539             ioclose(ioctx);
15540         return (NULL);
15541     }
15542     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15543     if (stream == NULL) {
15544         xmlFreeParserInputBuffer(input);
15545         return (NULL);
15546     }
15547     inputPush(ctxt, stream);
15548     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15549 }
15550 
15551 #define bottom_parser
15552 #include "elfgcchack.h"
15553