1 /* libxml2 - Library for parsing XML documents
2  * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3  *
4  * This file is not part of the GNU gettext program, but is used with
5  * GNU gettext.
6  *
7  * The original copyright notice is as follows:
8  */
9 
10 /*
11  * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is fur-
18  * nished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25  * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  * daniel@veillard.com
32  */
33 
34 /*
35  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
36  *            implemented on top of the SAX interfaces
37  *
38  * References:
39  *   The XML specification:
40  *     http://www.w3.org/TR/REC-xml
41  *   Original 1.0 version:
42  *     http://www.w3.org/TR/1998/REC-xml-19980210
43  *   XML second edition working draft
44  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
45  *
46  * Okay this is a big file, the parser core is around 7000 lines, then it
47  * is followed by the progressive parser top routines, then the various
48  * high level APIs to call the parser and a few miscellaneous functions.
49  * A number of helper functions and deprecated ones have been moved to
50  * parserInternals.c to reduce this file size.
51  * As much as possible the functions are associated with their relative
52  * production in the XML specification. A few productions defining the
53  * different ranges of character are actually implanted either in
54  * parserInternals.h or parserInternals.c
55  * The DOM tree build is realized from the default SAX callbacks in
56  * the module SAX.c.
57  * The routines doing the validation checks are in valid.c and called either
58  * from the SAX callbacks or as standalone functions using a preparsed
59  * document.
60  */
61 
62 /* To avoid EBCDIC trouble when parsing on zOS */
63 #if defined(__MVS__)
64 #pragma convert("ISO8859-1")
65 #endif
66 
67 #define IN_LIBXML
68 #include "libxml.h"
69 
70 #if defined(_WIN32) && !defined (__CYGWIN__)
71 #define XML_DIR_SEP '\\'
72 #else
73 #define XML_DIR_SEP '/'
74 #endif
75 
76 #include <stdlib.h>
77 #include <limits.h>
78 #include <string.h>
79 #include <stdarg.h>
80 #include <stddef.h>
81 #include <libxml/xmlmemory.h>
82 #include <libxml/threads.h>
83 #include <libxml/globals.h>
84 #include <libxml/tree.h>
85 #include <libxml/parser.h>
86 #include <libxml/parserInternals.h>
87 #include <libxml/valid.h>
88 #include <libxml/entities.h>
89 #include <libxml/xmlerror.h>
90 #include <libxml/encoding.h>
91 #include <libxml/xmlIO.h>
92 #include <libxml/uri.h>
93 #ifdef LIBXML_CATALOG_ENABLED
94 #include <libxml/catalog.h>
95 #endif
96 #ifdef LIBXML_SCHEMAS_ENABLED
97 #include <libxml/xmlschemastypes.h>
98 #include <libxml/relaxng.h>
99 #endif
100 #ifdef HAVE_CTYPE_H
101 #include <ctype.h>
102 #endif
103 #ifdef HAVE_STDLIB_H
104 #include <stdlib.h>
105 #endif
106 #ifdef HAVE_SYS_STAT_H
107 #include <sys/stat.h>
108 #endif
109 #ifdef HAVE_FCNTL_H
110 #include <fcntl.h>
111 #endif
112 #ifdef HAVE_UNISTD_H
113 #include <unistd.h>
114 #endif
115 
116 #include "buf.h"
117 #include "enc.h"
118 
119 static void
120 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
121 
122 static xmlParserCtxtPtr
123 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
124 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
125 
126 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
127 
128 /************************************************************************
129  *									*
130  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
131  *									*
132  ************************************************************************/
133 
134 #define XML_PARSER_BIG_ENTITY 1000
135 #define XML_PARSER_LOT_ENTITY 5000
136 
137 /*
138  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
139  *    replacement over the size in byte of the input indicates that you have
140  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
141  *    replacement per byte of input.
142  */
143 #define XML_PARSER_NON_LINEAR 10
144 
145 /*
146  * xmlParserEntityCheck
147  *
148  * Function to check non-linear entity expansion behaviour
149  * This is here to detect and stop exponential linear entity expansion
150  * This is not a limitation of the parser but a safety
151  * boundary feature. It can be disabled with the XML_PARSE_HUGE
152  * parser option.
153  */
154 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)155 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
156                      xmlEntityPtr ent, size_t replacement)
157 {
158     size_t consumed = 0;
159 
160     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
161         return (0);
162     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
163         return (1);
164 
165     /*
166      * This may look absurd but is needed to detect
167      * entities problems
168      */
169     if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
170 	(ent->content != NULL) && (ent->checked == 0) &&
171 	(ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
172 	unsigned long oldnbent = ctxt->nbentities;
173 	xmlChar *rep;
174 
175 	ent->checked = 1;
176 
177         ++ctxt->depth;
178 	rep = xmlStringDecodeEntities(ctxt, ent->content,
179 				  XML_SUBSTITUTE_REF, 0, 0, 0);
180         --ctxt->depth;
181 	if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
182 	    ent->content[0] = 0;
183 	}
184 
185 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
186 	if (rep != NULL) {
187 	    if (xmlStrchr(rep, '<'))
188 		ent->checked |= 1;
189 	    xmlFree(rep);
190 	    rep = NULL;
191 	}
192     }
193     if (replacement != 0) {
194 	if (replacement < XML_MAX_TEXT_LENGTH)
195 	    return(0);
196 
197         /*
198 	 * If the volume of entity copy reaches 10 times the
199 	 * amount of parsed data and over the large text threshold
200 	 * then that's very likely to be an abuse.
201 	 */
202         if (ctxt->input != NULL) {
203 	    consumed = ctxt->input->consumed +
204 	               (ctxt->input->cur - ctxt->input->base);
205 	}
206         consumed += ctxt->sizeentities;
207 
208         if (replacement < XML_PARSER_NON_LINEAR * consumed)
209 	    return(0);
210     } else if (size != 0) {
211         /*
212          * Do the check based on the replacement size of the entity
213          */
214         if (size < XML_PARSER_BIG_ENTITY)
215 	    return(0);
216 
217         /*
218          * A limit on the amount of text data reasonably used
219          */
220         if (ctxt->input != NULL) {
221             consumed = ctxt->input->consumed +
222                 (ctxt->input->cur - ctxt->input->base);
223         }
224         consumed += ctxt->sizeentities;
225 
226         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
227 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
228             return (0);
229     } else if (ent != NULL) {
230         /*
231          * use the number of parsed entities in the replacement
232          */
233         size = ent->checked / 2;
234 
235         /*
236          * The amount of data parsed counting entities size only once
237          */
238         if (ctxt->input != NULL) {
239             consumed = ctxt->input->consumed +
240                 (ctxt->input->cur - ctxt->input->base);
241         }
242         consumed += ctxt->sizeentities;
243 
244         /*
245          * Check the density of entities for the amount of data
246 	 * knowing an entity reference will take at least 3 bytes
247          */
248         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
249             return (0);
250     } else {
251         /*
252          * strange we got no data for checking
253          */
254 	if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
255 	     (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
256 	    (ctxt->nbentities <= 10000))
257 	    return (0);
258     }
259     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
260     return (1);
261 }
262 
263 /**
264  * xmlParserMaxDepth:
265  *
266  * arbitrary depth limit for the XML documents that we allow to
267  * process. This is not a limitation of the parser but a safety
268  * boundary feature. It can be disabled with the XML_PARSE_HUGE
269  * parser option.
270  */
271 unsigned int xmlParserMaxDepth = 256;
272 
273 
274 
275 #define SAX2 1
276 #define XML_PARSER_BIG_BUFFER_SIZE 300
277 #define XML_PARSER_BUFFER_SIZE 100
278 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
279 
280 /**
281  * XML_PARSER_CHUNK_SIZE
282  *
283  * When calling GROW that's the minimal amount of data
284  * the parser expected to have received. It is not a hard
285  * limit but an optimization when reading strings like Names
286  * It is not strictly needed as long as inputs available characters
287  * are followed by 0, which should be provided by the I/O level
288  */
289 #define XML_PARSER_CHUNK_SIZE 100
290 
291 /*
292  * List of XML prefixed PI allowed by W3C specs
293  */
294 
295 static const char *xmlW3CPIs[] = {
296     "xml-stylesheet",
297     "xml-model",
298     NULL
299 };
300 
301 
302 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
303 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
304                                               const xmlChar **str);
305 
306 static xmlParserErrors
307 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
308 	              xmlSAXHandlerPtr sax,
309 		      void *user_data, int depth, const xmlChar *URL,
310 		      const xmlChar *ID, xmlNodePtr *list);
311 
312 static int
313 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
314                           const char *encoding);
315 #ifdef LIBXML_LEGACY_ENABLED
316 static void
317 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
318                       xmlNodePtr lastNode);
319 #endif /* LIBXML_LEGACY_ENABLED */
320 
321 static xmlParserErrors
322 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
323 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
324 
325 static int
326 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
327 
328 /************************************************************************
329  *									*
330  *		Some factorized error routines				*
331  *									*
332  ************************************************************************/
333 
334 /**
335  * xmlErrAttributeDup:
336  * @ctxt:  an XML parser context
337  * @prefix:  the attribute prefix
338  * @localname:  the attribute localname
339  *
340  * Handle a redefinition of attribute error
341  */
342 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)343 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
344                    const xmlChar * localname)
345 {
346     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
347         (ctxt->instate == XML_PARSER_EOF))
348 	return;
349     if (ctxt != NULL)
350 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
351 
352     if (prefix == NULL)
353         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
354                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
355                         (const char *) localname, NULL, NULL, 0, 0,
356                         "Attribute %s redefined\n", localname);
357     else
358         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
359                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
360                         (const char *) prefix, (const char *) localname,
361                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
362                         localname);
363     if (ctxt != NULL) {
364 	ctxt->wellFormed = 0;
365 	if (ctxt->recovery == 0)
366 	    ctxt->disableSAX = 1;
367     }
368 }
369 
370 /**
371  * xmlFatalErr:
372  * @ctxt:  an XML parser context
373  * @error:  the error number
374  * @extra:  extra information string
375  *
376  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
377  */
378 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)379 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
380 {
381     const char *errmsg;
382 
383     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384         (ctxt->instate == XML_PARSER_EOF))
385 	return;
386     switch (error) {
387         case XML_ERR_INVALID_HEX_CHARREF:
388             errmsg = "CharRef: invalid hexadecimal value";
389             break;
390         case XML_ERR_INVALID_DEC_CHARREF:
391             errmsg = "CharRef: invalid decimal value";
392             break;
393         case XML_ERR_INVALID_CHARREF:
394             errmsg = "CharRef: invalid value";
395             break;
396         case XML_ERR_INTERNAL_ERROR:
397             errmsg = "internal error";
398             break;
399         case XML_ERR_PEREF_AT_EOF:
400             errmsg = "PEReference at end of document";
401             break;
402         case XML_ERR_PEREF_IN_PROLOG:
403             errmsg = "PEReference in prolog";
404             break;
405         case XML_ERR_PEREF_IN_EPILOG:
406             errmsg = "PEReference in epilog";
407             break;
408         case XML_ERR_PEREF_NO_NAME:
409             errmsg = "PEReference: no name";
410             break;
411         case XML_ERR_PEREF_SEMICOL_MISSING:
412             errmsg = "PEReference: expecting ';'";
413             break;
414         case XML_ERR_ENTITY_LOOP:
415             errmsg = "Detected an entity reference loop";
416             break;
417         case XML_ERR_ENTITY_NOT_STARTED:
418             errmsg = "EntityValue: \" or ' expected";
419             break;
420         case XML_ERR_ENTITY_PE_INTERNAL:
421             errmsg = "PEReferences forbidden in internal subset";
422             break;
423         case XML_ERR_ENTITY_NOT_FINISHED:
424             errmsg = "EntityValue: \" or ' expected";
425             break;
426         case XML_ERR_ATTRIBUTE_NOT_STARTED:
427             errmsg = "AttValue: \" or ' expected";
428             break;
429         case XML_ERR_LT_IN_ATTRIBUTE:
430             errmsg = "Unescaped '<' not allowed in attributes values";
431             break;
432         case XML_ERR_LITERAL_NOT_STARTED:
433             errmsg = "SystemLiteral \" or ' expected";
434             break;
435         case XML_ERR_LITERAL_NOT_FINISHED:
436             errmsg = "Unfinished System or Public ID \" or ' expected";
437             break;
438         case XML_ERR_MISPLACED_CDATA_END:
439             errmsg = "Sequence ']]>' not allowed in content";
440             break;
441         case XML_ERR_URI_REQUIRED:
442             errmsg = "SYSTEM or PUBLIC, the URI is missing";
443             break;
444         case XML_ERR_PUBID_REQUIRED:
445             errmsg = "PUBLIC, the Public Identifier is missing";
446             break;
447         case XML_ERR_HYPHEN_IN_COMMENT:
448             errmsg = "Comment must not contain '--' (double-hyphen)";
449             break;
450         case XML_ERR_PI_NOT_STARTED:
451             errmsg = "xmlParsePI : no target name";
452             break;
453         case XML_ERR_RESERVED_XML_NAME:
454             errmsg = "Invalid PI name";
455             break;
456         case XML_ERR_NOTATION_NOT_STARTED:
457             errmsg = "NOTATION: Name expected here";
458             break;
459         case XML_ERR_NOTATION_NOT_FINISHED:
460             errmsg = "'>' required to close NOTATION declaration";
461             break;
462         case XML_ERR_VALUE_REQUIRED:
463             errmsg = "Entity value required";
464             break;
465         case XML_ERR_URI_FRAGMENT:
466             errmsg = "Fragment not allowed";
467             break;
468         case XML_ERR_ATTLIST_NOT_STARTED:
469             errmsg = "'(' required to start ATTLIST enumeration";
470             break;
471         case XML_ERR_NMTOKEN_REQUIRED:
472             errmsg = "NmToken expected in ATTLIST enumeration";
473             break;
474         case XML_ERR_ATTLIST_NOT_FINISHED:
475             errmsg = "')' required to finish ATTLIST enumeration";
476             break;
477         case XML_ERR_MIXED_NOT_STARTED:
478             errmsg = "MixedContentDecl : '|' or ')*' expected";
479             break;
480         case XML_ERR_PCDATA_REQUIRED:
481             errmsg = "MixedContentDecl : '#PCDATA' expected";
482             break;
483         case XML_ERR_ELEMCONTENT_NOT_STARTED:
484             errmsg = "ContentDecl : Name or '(' expected";
485             break;
486         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
487             errmsg = "ContentDecl : ',' '|' or ')' expected";
488             break;
489         case XML_ERR_PEREF_IN_INT_SUBSET:
490             errmsg =
491                 "PEReference: forbidden within markup decl in internal subset";
492             break;
493         case XML_ERR_GT_REQUIRED:
494             errmsg = "expected '>'";
495             break;
496         case XML_ERR_CONDSEC_INVALID:
497             errmsg = "XML conditional section '[' expected";
498             break;
499         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
500             errmsg = "Content error in the external subset";
501             break;
502         case XML_ERR_CONDSEC_INVALID_KEYWORD:
503             errmsg =
504                 "conditional section INCLUDE or IGNORE keyword expected";
505             break;
506         case XML_ERR_CONDSEC_NOT_FINISHED:
507             errmsg = "XML conditional section not closed";
508             break;
509         case XML_ERR_XMLDECL_NOT_STARTED:
510             errmsg = "Text declaration '<?xml' required";
511             break;
512         case XML_ERR_XMLDECL_NOT_FINISHED:
513             errmsg = "parsing XML declaration: '?>' expected";
514             break;
515         case XML_ERR_EXT_ENTITY_STANDALONE:
516             errmsg = "external parsed entities cannot be standalone";
517             break;
518         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
519             errmsg = "EntityRef: expecting ';'";
520             break;
521         case XML_ERR_DOCTYPE_NOT_FINISHED:
522             errmsg = "DOCTYPE improperly terminated";
523             break;
524         case XML_ERR_LTSLASH_REQUIRED:
525             errmsg = "EndTag: '</' not found";
526             break;
527         case XML_ERR_EQUAL_REQUIRED:
528             errmsg = "expected '='";
529             break;
530         case XML_ERR_STRING_NOT_CLOSED:
531             errmsg = "String not closed expecting \" or '";
532             break;
533         case XML_ERR_STRING_NOT_STARTED:
534             errmsg = "String not started expecting ' or \"";
535             break;
536         case XML_ERR_ENCODING_NAME:
537             errmsg = "Invalid XML encoding name";
538             break;
539         case XML_ERR_STANDALONE_VALUE:
540             errmsg = "standalone accepts only 'yes' or 'no'";
541             break;
542         case XML_ERR_DOCUMENT_EMPTY:
543             errmsg = "Document is empty";
544             break;
545         case XML_ERR_DOCUMENT_END:
546             errmsg = "Extra content at the end of the document";
547             break;
548         case XML_ERR_NOT_WELL_BALANCED:
549             errmsg = "chunk is not well balanced";
550             break;
551         case XML_ERR_EXTRA_CONTENT:
552             errmsg = "extra content at the end of well balanced chunk";
553             break;
554         case XML_ERR_VERSION_MISSING:
555             errmsg = "Malformed declaration expecting version";
556             break;
557         case XML_ERR_NAME_TOO_LONG:
558             errmsg = "Name too long use XML_PARSE_HUGE option";
559             break;
560 #if 0
561         case:
562             errmsg = "";
563             break;
564 #endif
565         default:
566             errmsg = "Unregistered error message";
567     }
568     if (ctxt != NULL)
569 	ctxt->errNo = error;
570     if (info == NULL) {
571         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
572                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
573                         errmsg);
574     } else {
575         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
576                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
577                         errmsg, info);
578     }
579     if (ctxt != NULL) {
580 	ctxt->wellFormed = 0;
581 	if (ctxt->recovery == 0)
582 	    ctxt->disableSAX = 1;
583     }
584 }
585 
586 /**
587  * xmlFatalErrMsg:
588  * @ctxt:  an XML parser context
589  * @error:  the error number
590  * @msg:  the error message
591  *
592  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593  */
594 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)595 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596                const char *msg)
597 {
598     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
599         (ctxt->instate == XML_PARSER_EOF))
600 	return;
601     if (ctxt != NULL)
602 	ctxt->errNo = error;
603     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
604                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
605     if (ctxt != NULL) {
606 	ctxt->wellFormed = 0;
607 	if (ctxt->recovery == 0)
608 	    ctxt->disableSAX = 1;
609     }
610 }
611 
612 /**
613  * xmlWarningMsg:
614  * @ctxt:  an XML parser context
615  * @error:  the error number
616  * @msg:  the error message
617  * @str1:  extra data
618  * @str2:  extra data
619  *
620  * Handle a warning.
621  */
622 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)623 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624               const char *msg, const xmlChar *str1, const xmlChar *str2)
625 {
626     xmlStructuredErrorFunc schannel = NULL;
627 
628     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629         (ctxt->instate == XML_PARSER_EOF))
630 	return;
631     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
632         (ctxt->sax->initialized == XML_SAX2_MAGIC))
633         schannel = ctxt->sax->serror;
634     if (ctxt != NULL) {
635         __xmlRaiseError(schannel,
636                     (ctxt->sax) ? ctxt->sax->warning : NULL,
637                     ctxt->userData,
638                     ctxt, NULL, XML_FROM_PARSER, error,
639                     XML_ERR_WARNING, NULL, 0,
640 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
641 		    msg, (const char *) str1, (const char *) str2);
642     } else {
643         __xmlRaiseError(schannel, NULL, NULL,
644                     ctxt, NULL, XML_FROM_PARSER, error,
645                     XML_ERR_WARNING, NULL, 0,
646 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
647 		    msg, (const char *) str1, (const char *) str2);
648     }
649 }
650 
651 /**
652  * xmlValidityError:
653  * @ctxt:  an XML parser context
654  * @error:  the error number
655  * @msg:  the error message
656  * @str1:  extra data
657  *
658  * Handle a validity error.
659  */
660 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)661 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
662               const char *msg, const xmlChar *str1, const xmlChar *str2)
663 {
664     xmlStructuredErrorFunc schannel = NULL;
665 
666     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667         (ctxt->instate == XML_PARSER_EOF))
668 	return;
669     if (ctxt != NULL) {
670 	ctxt->errNo = error;
671 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
672 	    schannel = ctxt->sax->serror;
673     }
674     if (ctxt != NULL) {
675         __xmlRaiseError(schannel,
676                     ctxt->vctxt.error, ctxt->vctxt.userData,
677                     ctxt, NULL, XML_FROM_DTD, error,
678                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
679 		    (const char *) str2, NULL, 0, 0,
680 		    msg, (const char *) str1, (const char *) str2);
681 	ctxt->valid = 0;
682     } else {
683         __xmlRaiseError(schannel, NULL, NULL,
684                     ctxt, NULL, XML_FROM_DTD, error,
685                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
686 		    (const char *) str2, NULL, 0, 0,
687 		    msg, (const char *) str1, (const char *) str2);
688     }
689 }
690 
691 /**
692  * xmlFatalErrMsgInt:
693  * @ctxt:  an XML parser context
694  * @error:  the error number
695  * @msg:  the error message
696  * @val:  an integer value
697  *
698  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
699  */
700 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)701 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
702                   const char *msg, int val)
703 {
704     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
705         (ctxt->instate == XML_PARSER_EOF))
706 	return;
707     if (ctxt != NULL)
708 	ctxt->errNo = error;
709     __xmlRaiseError(NULL, NULL, NULL,
710                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
711                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
712     if (ctxt != NULL) {
713 	ctxt->wellFormed = 0;
714 	if (ctxt->recovery == 0)
715 	    ctxt->disableSAX = 1;
716     }
717 }
718 
719 /**
720  * xmlFatalErrMsgStrIntStr:
721  * @ctxt:  an XML parser context
722  * @error:  the error number
723  * @msg:  the error message
724  * @str1:  an string info
725  * @val:  an integer value
726  * @str2:  an string info
727  *
728  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
729  */
730 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)731 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
732                   const char *msg, const xmlChar *str1, int val,
733 		  const xmlChar *str2)
734 {
735     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736         (ctxt->instate == XML_PARSER_EOF))
737 	return;
738     if (ctxt != NULL)
739 	ctxt->errNo = error;
740     __xmlRaiseError(NULL, NULL, NULL,
741                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
742                     NULL, 0, (const char *) str1, (const char *) str2,
743 		    NULL, val, 0, msg, str1, val, str2);
744     if (ctxt != NULL) {
745 	ctxt->wellFormed = 0;
746 	if (ctxt->recovery == 0)
747 	    ctxt->disableSAX = 1;
748     }
749 }
750 
751 /**
752  * xmlFatalErrMsgStr:
753  * @ctxt:  an XML parser context
754  * @error:  the error number
755  * @msg:  the error message
756  * @val:  a string value
757  *
758  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
759  */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762                   const char *msg, const xmlChar * val)
763 {
764     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765         (ctxt->instate == XML_PARSER_EOF))
766 	return;
767     if (ctxt != NULL)
768 	ctxt->errNo = error;
769     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770                     XML_FROM_PARSER, error, XML_ERR_FATAL,
771                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772                     val);
773     if (ctxt != NULL) {
774 	ctxt->wellFormed = 0;
775 	if (ctxt->recovery == 0)
776 	    ctxt->disableSAX = 1;
777     }
778 }
779 
780 /**
781  * xmlErrMsgStr:
782  * @ctxt:  an XML parser context
783  * @error:  the error number
784  * @msg:  the error message
785  * @val:  a string value
786  *
787  * Handle a non fatal parser error
788  */
789 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)790 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
791                   const char *msg, const xmlChar * val)
792 {
793     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
794         (ctxt->instate == XML_PARSER_EOF))
795 	return;
796     if (ctxt != NULL)
797 	ctxt->errNo = error;
798     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
799                     XML_FROM_PARSER, error, XML_ERR_ERROR,
800                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
801                     val);
802 }
803 
804 /**
805  * xmlNsErr:
806  * @ctxt:  an XML parser context
807  * @error:  the error number
808  * @msg:  the message
809  * @info1:  extra information string
810  * @info2:  extra information string
811  *
812  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
813  */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816          const char *msg,
817          const xmlChar * info1, const xmlChar * info2,
818          const xmlChar * info3)
819 {
820     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821         (ctxt->instate == XML_PARSER_EOF))
822 	return;
823     if (ctxt != NULL)
824 	ctxt->errNo = error;
825     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
826                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
827                     (const char *) info2, (const char *) info3, 0, 0, msg,
828                     info1, info2, info3);
829     if (ctxt != NULL)
830 	ctxt->nsWellFormed = 0;
831 }
832 
833 /**
834  * xmlNsWarn
835  * @ctxt:  an XML parser context
836  * @error:  the error number
837  * @msg:  the message
838  * @info1:  extra information string
839  * @info2:  extra information string
840  *
841  * Handle a namespace warning error
842  */
843 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)844 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
845          const char *msg,
846          const xmlChar * info1, const xmlChar * info2,
847          const xmlChar * info3)
848 {
849     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
850         (ctxt->instate == XML_PARSER_EOF))
851 	return;
852     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
853                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
854                     (const char *) info2, (const char *) info3, 0, 0, msg,
855                     info1, info2, info3);
856 }
857 
858 /************************************************************************
859  *									*
860  *		Library wide options					*
861  *									*
862  ************************************************************************/
863 
864 /**
865   * xmlHasFeature:
866   * @feature: the feature to be examined
867   *
868   * Examines if the library has been compiled with a given feature.
869   *
870   * Returns a non-zero value if the feature exist, otherwise zero.
871   * Returns zero (0) if the feature does not exist or an unknown
872   * unknown feature is requested, non-zero otherwise.
873   */
874 int
xmlHasFeature(xmlFeature feature)875 xmlHasFeature(xmlFeature feature)
876 {
877     switch (feature) {
878 	case XML_WITH_THREAD:
879 #ifdef LIBXML_THREAD_ENABLED
880 	    return(1);
881 #else
882 	    return(0);
883 #endif
884         case XML_WITH_TREE:
885 #ifdef LIBXML_TREE_ENABLED
886             return(1);
887 #else
888             return(0);
889 #endif
890         case XML_WITH_OUTPUT:
891 #ifdef LIBXML_OUTPUT_ENABLED
892             return(1);
893 #else
894             return(0);
895 #endif
896         case XML_WITH_PUSH:
897 #ifdef LIBXML_PUSH_ENABLED
898             return(1);
899 #else
900             return(0);
901 #endif
902         case XML_WITH_READER:
903 #ifdef LIBXML_READER_ENABLED
904             return(1);
905 #else
906             return(0);
907 #endif
908         case XML_WITH_PATTERN:
909 #ifdef LIBXML_PATTERN_ENABLED
910             return(1);
911 #else
912             return(0);
913 #endif
914         case XML_WITH_WRITER:
915 #ifdef LIBXML_WRITER_ENABLED
916             return(1);
917 #else
918             return(0);
919 #endif
920         case XML_WITH_SAX1:
921 #ifdef LIBXML_SAX1_ENABLED
922             return(1);
923 #else
924             return(0);
925 #endif
926         case XML_WITH_FTP:
927 #ifdef LIBXML_FTP_ENABLED
928             return(1);
929 #else
930             return(0);
931 #endif
932         case XML_WITH_HTTP:
933 #ifdef LIBXML_HTTP_ENABLED
934             return(1);
935 #else
936             return(0);
937 #endif
938         case XML_WITH_VALID:
939 #ifdef LIBXML_VALID_ENABLED
940             return(1);
941 #else
942             return(0);
943 #endif
944         case XML_WITH_HTML:
945 #ifdef LIBXML_HTML_ENABLED
946             return(1);
947 #else
948             return(0);
949 #endif
950         case XML_WITH_LEGACY:
951 #ifdef LIBXML_LEGACY_ENABLED
952             return(1);
953 #else
954             return(0);
955 #endif
956         case XML_WITH_C14N:
957 #ifdef LIBXML_C14N_ENABLED
958             return(1);
959 #else
960             return(0);
961 #endif
962         case XML_WITH_CATALOG:
963 #ifdef LIBXML_CATALOG_ENABLED
964             return(1);
965 #else
966             return(0);
967 #endif
968         case XML_WITH_XPATH:
969 #ifdef LIBXML_XPATH_ENABLED
970             return(1);
971 #else
972             return(0);
973 #endif
974         case XML_WITH_XPTR:
975 #ifdef LIBXML_XPTR_ENABLED
976             return(1);
977 #else
978             return(0);
979 #endif
980         case XML_WITH_XINCLUDE:
981 #ifdef LIBXML_XINCLUDE_ENABLED
982             return(1);
983 #else
984             return(0);
985 #endif
986         case XML_WITH_ICONV:
987 #ifdef LIBXML_ICONV_ENABLED
988             return(1);
989 #else
990             return(0);
991 #endif
992         case XML_WITH_ISO8859X:
993 #ifdef LIBXML_ISO8859X_ENABLED
994             return(1);
995 #else
996             return(0);
997 #endif
998         case XML_WITH_UNICODE:
999 #ifdef LIBXML_UNICODE_ENABLED
1000             return(1);
1001 #else
1002             return(0);
1003 #endif
1004         case XML_WITH_REGEXP:
1005 #ifdef LIBXML_REGEXP_ENABLED
1006             return(1);
1007 #else
1008             return(0);
1009 #endif
1010         case XML_WITH_AUTOMATA:
1011 #ifdef LIBXML_AUTOMATA_ENABLED
1012             return(1);
1013 #else
1014             return(0);
1015 #endif
1016         case XML_WITH_EXPR:
1017 #ifdef LIBXML_EXPR_ENABLED
1018             return(1);
1019 #else
1020             return(0);
1021 #endif
1022         case XML_WITH_SCHEMAS:
1023 #ifdef LIBXML_SCHEMAS_ENABLED
1024             return(1);
1025 #else
1026             return(0);
1027 #endif
1028         case XML_WITH_SCHEMATRON:
1029 #ifdef LIBXML_SCHEMATRON_ENABLED
1030             return(1);
1031 #else
1032             return(0);
1033 #endif
1034         case XML_WITH_MODULES:
1035 #ifdef LIBXML_MODULES_ENABLED
1036             return(1);
1037 #else
1038             return(0);
1039 #endif
1040         case XML_WITH_DEBUG:
1041 #ifdef LIBXML_DEBUG_ENABLED
1042             return(1);
1043 #else
1044             return(0);
1045 #endif
1046         case XML_WITH_DEBUG_MEM:
1047 #ifdef DEBUG_MEMORY_LOCATION
1048             return(1);
1049 #else
1050             return(0);
1051 #endif
1052         case XML_WITH_DEBUG_RUN:
1053 #ifdef LIBXML_DEBUG_RUNTIME
1054             return(1);
1055 #else
1056             return(0);
1057 #endif
1058         case XML_WITH_ZLIB:
1059 #ifdef LIBXML_ZLIB_ENABLED
1060             return(1);
1061 #else
1062             return(0);
1063 #endif
1064         case XML_WITH_LZMA:
1065 #ifdef LIBXML_LZMA_ENABLED
1066             return(1);
1067 #else
1068             return(0);
1069 #endif
1070         case XML_WITH_ICU:
1071 #ifdef LIBXML_ICU_ENABLED
1072             return(1);
1073 #else
1074             return(0);
1075 #endif
1076         default:
1077 	    break;
1078      }
1079      return(0);
1080 }
1081 
1082 /************************************************************************
1083  *									*
1084  *		SAX2 defaulted attributes handling			*
1085  *									*
1086  ************************************************************************/
1087 
1088 /**
1089  * xmlDetectSAX2:
1090  * @ctxt:  an XML parser context
1091  *
1092  * Do the SAX2 detection and specific intialization
1093  */
1094 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1095 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1096     if (ctxt == NULL) return;
1097 #ifdef LIBXML_SAX1_ENABLED
1098     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1099         ((ctxt->sax->startElementNs != NULL) ||
1100          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1101 #else
1102     ctxt->sax2 = 1;
1103 #endif /* LIBXML_SAX1_ENABLED */
1104 
1105     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1106     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1107     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1108     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1109 		(ctxt->str_xml_ns == NULL)) {
1110         xmlErrMemory(ctxt, NULL);
1111     }
1112 }
1113 
1114 typedef struct _xmlDefAttrs xmlDefAttrs;
1115 typedef xmlDefAttrs *xmlDefAttrsPtr;
1116 struct _xmlDefAttrs {
1117     int nbAttrs;	/* number of defaulted attributes on that element */
1118     int maxAttrs;       /* the size of the array */
1119 #if __STDC_VERSION__ >= 199901L && !defined __HP_cc
1120     /* Using a C99 flexible array member avoids UBSan errors. */
1121     const xmlChar *values[]; /* array of localname/prefix/values/external */
1122 #else
1123     const xmlChar *values[5];
1124 #endif
1125 };
1126 
1127 /**
1128  * xmlAttrNormalizeSpace:
1129  * @src: the source string
1130  * @dst: the target string
1131  *
1132  * Normalize the space in non CDATA attribute values:
1133  * If the attribute type is not CDATA, then the XML processor MUST further
1134  * process the normalized attribute value by discarding any leading and
1135  * trailing space (#x20) characters, and by replacing sequences of space
1136  * (#x20) characters by a single space (#x20) character.
1137  * Note that the size of dst need to be at least src, and if one doesn't need
1138  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1139  * passing src as dst is just fine.
1140  *
1141  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1142  *         is needed.
1143  */
1144 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1145 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1146 {
1147     if ((src == NULL) || (dst == NULL))
1148         return(NULL);
1149 
1150     while (*src == 0x20) src++;
1151     while (*src != 0) {
1152 	if (*src == 0x20) {
1153 	    while (*src == 0x20) src++;
1154 	    if (*src != 0)
1155 		*dst++ = 0x20;
1156 	} else {
1157 	    *dst++ = *src++;
1158 	}
1159     }
1160     *dst = 0;
1161     if (dst == src)
1162        return(NULL);
1163     return(dst);
1164 }
1165 
1166 /**
1167  * xmlAttrNormalizeSpace2:
1168  * @src: the source string
1169  *
1170  * Normalize the space in non CDATA attribute values, a slightly more complex
1171  * front end to avoid allocation problems when running on attribute values
1172  * coming from the input.
1173  *
1174  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1175  *         is needed.
1176  */
1177 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1178 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1179 {
1180     int i;
1181     int remove_head = 0;
1182     int need_realloc = 0;
1183     const xmlChar *cur;
1184 
1185     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1186         return(NULL);
1187     i = *len;
1188     if (i <= 0)
1189         return(NULL);
1190 
1191     cur = src;
1192     while (*cur == 0x20) {
1193         cur++;
1194 	remove_head++;
1195     }
1196     while (*cur != 0) {
1197 	if (*cur == 0x20) {
1198 	    cur++;
1199 	    if ((*cur == 0x20) || (*cur == 0)) {
1200 	        need_realloc = 1;
1201 		break;
1202 	    }
1203 	} else
1204 	    cur++;
1205     }
1206     if (need_realloc) {
1207         xmlChar *ret;
1208 
1209 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1210 	if (ret == NULL) {
1211 	    xmlErrMemory(ctxt, NULL);
1212 	    return(NULL);
1213 	}
1214 	xmlAttrNormalizeSpace(ret, ret);
1215 	*len = (int) strlen((const char *)ret);
1216         return(ret);
1217     } else if (remove_head) {
1218         *len -= remove_head;
1219         memmove(src, src + remove_head, 1 + *len);
1220 	return(src);
1221     }
1222     return(NULL);
1223 }
1224 
1225 /**
1226  * xmlAddDefAttrs:
1227  * @ctxt:  an XML parser context
1228  * @fullname:  the element fullname
1229  * @fullattr:  the attribute fullname
1230  * @value:  the attribute value
1231  *
1232  * Add a defaulted attribute for an element
1233  */
1234 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1235 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1236                const xmlChar *fullname,
1237                const xmlChar *fullattr,
1238                const xmlChar *value) {
1239     xmlDefAttrsPtr defaults;
1240     int len;
1241     const xmlChar *name;
1242     const xmlChar *prefix;
1243 
1244     /*
1245      * Allows to detect attribute redefinitions
1246      */
1247     if (ctxt->attsSpecial != NULL) {
1248         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1249 	    return;
1250     }
1251 
1252     if (ctxt->attsDefault == NULL) {
1253         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1254 	if (ctxt->attsDefault == NULL)
1255 	    goto mem_error;
1256     }
1257 
1258     /*
1259      * split the element name into prefix:localname , the string found
1260      * are within the DTD and then not associated to namespace names.
1261      */
1262     name = xmlSplitQName3(fullname, &len);
1263     if (name == NULL) {
1264         name = xmlDictLookup(ctxt->dict, fullname, -1);
1265 	prefix = NULL;
1266     } else {
1267         name = xmlDictLookup(ctxt->dict, name, -1);
1268 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1269     }
1270 
1271     /*
1272      * make sure there is some storage
1273      */
1274     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1275     if (defaults == NULL) {
1276         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1277 	                   (4 * 5) * sizeof(const xmlChar *));
1278 	if (defaults == NULL)
1279 	    goto mem_error;
1280 	defaults->nbAttrs = 0;
1281 	defaults->maxAttrs = 4;
1282 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1283 	                        defaults, NULL) < 0) {
1284 	    xmlFree(defaults);
1285 	    goto mem_error;
1286 	}
1287     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1288         xmlDefAttrsPtr temp;
1289 
1290         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1291 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1292 	if (temp == NULL)
1293 	    goto mem_error;
1294 	defaults = temp;
1295 	defaults->maxAttrs *= 2;
1296 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 	                        defaults, NULL) < 0) {
1298 	    xmlFree(defaults);
1299 	    goto mem_error;
1300 	}
1301     }
1302 
1303     /*
1304      * Split the element name into prefix:localname , the string found
1305      * are within the DTD and hen not associated to namespace names.
1306      */
1307     name = xmlSplitQName3(fullattr, &len);
1308     if (name == NULL) {
1309         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1310 	prefix = NULL;
1311     } else {
1312         name = xmlDictLookup(ctxt->dict, name, -1);
1313 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1314     }
1315 
1316     defaults->values[5 * defaults->nbAttrs] = name;
1317     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1318     /* intern the string and precompute the end */
1319     len = xmlStrlen(value);
1320     value = xmlDictLookup(ctxt->dict, value, len);
1321     defaults->values[5 * defaults->nbAttrs + 2] = value;
1322     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1323     if (ctxt->external)
1324         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1325     else
1326         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1327     defaults->nbAttrs++;
1328 
1329     return;
1330 
1331 mem_error:
1332     xmlErrMemory(ctxt, NULL);
1333     return;
1334 }
1335 
1336 /**
1337  * xmlAddSpecialAttr:
1338  * @ctxt:  an XML parser context
1339  * @fullname:  the element fullname
1340  * @fullattr:  the attribute fullname
1341  * @type:  the attribute type
1342  *
1343  * Register this attribute type
1344  */
1345 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1346 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1347 		  const xmlChar *fullname,
1348 		  const xmlChar *fullattr,
1349 		  int type)
1350 {
1351     if (ctxt->attsSpecial == NULL) {
1352         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1353 	if (ctxt->attsSpecial == NULL)
1354 	    goto mem_error;
1355     }
1356 
1357     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1358         return;
1359 
1360     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1361                      (void *) (ptrdiff_t) type);
1362     return;
1363 
1364 mem_error:
1365     xmlErrMemory(ctxt, NULL);
1366     return;
1367 }
1368 
1369 /**
1370  * xmlCleanSpecialAttrCallback:
1371  *
1372  * Removes CDATA attributes from the special attribute table
1373  */
1374 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1375 xmlCleanSpecialAttrCallback(void *payload, void *data,
1376                             const xmlChar *fullname, const xmlChar *fullattr,
1377                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1378     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1379 
1380     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1381         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1382     }
1383 }
1384 
1385 /**
1386  * xmlCleanSpecialAttr:
1387  * @ctxt:  an XML parser context
1388  *
1389  * Trim the list of attributes defined to remove all those of type
1390  * CDATA as they are not special. This call should be done when finishing
1391  * to parse the DTD and before starting to parse the document root.
1392  */
1393 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1394 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1395 {
1396     if (ctxt->attsSpecial == NULL)
1397         return;
1398 
1399     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1400 
1401     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1402         xmlHashFree(ctxt->attsSpecial, NULL);
1403         ctxt->attsSpecial = NULL;
1404     }
1405     return;
1406 }
1407 
1408 /**
1409  * xmlCheckLanguageID:
1410  * @lang:  pointer to the string value
1411  *
1412  * Checks that the value conforms to the LanguageID production:
1413  *
1414  * NOTE: this is somewhat deprecated, those productions were removed from
1415  *       the XML Second edition.
1416  *
1417  * [33] LanguageID ::= Langcode ('-' Subcode)*
1418  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1419  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1420  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1421  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1422  * [38] Subcode ::= ([a-z] | [A-Z])+
1423  *
1424  * The current REC reference the sucessors of RFC 1766, currently 5646
1425  *
1426  * http://www.rfc-editor.org/rfc/rfc5646.txt
1427  * langtag       = language
1428  *                 ["-" script]
1429  *                 ["-" region]
1430  *                 *("-" variant)
1431  *                 *("-" extension)
1432  *                 ["-" privateuse]
1433  * language      = 2*3ALPHA            ; shortest ISO 639 code
1434  *                 ["-" extlang]       ; sometimes followed by
1435  *                                     ; extended language subtags
1436  *               / 4ALPHA              ; or reserved for future use
1437  *               / 5*8ALPHA            ; or registered language subtag
1438  *
1439  * extlang       = 3ALPHA              ; selected ISO 639 codes
1440  *                 *2("-" 3ALPHA)      ; permanently reserved
1441  *
1442  * script        = 4ALPHA              ; ISO 15924 code
1443  *
1444  * region        = 2ALPHA              ; ISO 3166-1 code
1445  *               / 3DIGIT              ; UN M.49 code
1446  *
1447  * variant       = 5*8alphanum         ; registered variants
1448  *               / (DIGIT 3alphanum)
1449  *
1450  * extension     = singleton 1*("-" (2*8alphanum))
1451  *
1452  *                                     ; Single alphanumerics
1453  *                                     ; "x" reserved for private use
1454  * singleton     = DIGIT               ; 0 - 9
1455  *               / %x41-57             ; A - W
1456  *               / %x59-5A             ; Y - Z
1457  *               / %x61-77             ; a - w
1458  *               / %x79-7A             ; y - z
1459  *
1460  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1461  * The parser below doesn't try to cope with extension or privateuse
1462  * that could be added but that's not interoperable anyway
1463  *
1464  * Returns 1 if correct 0 otherwise
1465  **/
1466 int
xmlCheckLanguageID(const xmlChar * lang)1467 xmlCheckLanguageID(const xmlChar * lang)
1468 {
1469     const xmlChar *cur = lang, *nxt;
1470 
1471     if (cur == NULL)
1472         return (0);
1473     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1474         ((cur[0] == 'I') && (cur[1] == '-')) ||
1475         ((cur[0] == 'x') && (cur[1] == '-')) ||
1476         ((cur[0] == 'X') && (cur[1] == '-'))) {
1477         /*
1478          * Still allow IANA code and user code which were coming
1479          * from the previous version of the XML-1.0 specification
1480          * it's deprecated but we should not fail
1481          */
1482         cur += 2;
1483         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1484                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1485             cur++;
1486         return(cur[0] == 0);
1487     }
1488     nxt = cur;
1489     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1490            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1491            nxt++;
1492     if (nxt - cur >= 4) {
1493         /*
1494          * Reserved
1495          */
1496         if ((nxt - cur > 8) || (nxt[0] != 0))
1497             return(0);
1498         return(1);
1499     }
1500     if (nxt - cur < 2)
1501         return(0);
1502     /* we got an ISO 639 code */
1503     if (nxt[0] == 0)
1504         return(1);
1505     if (nxt[0] != '-')
1506         return(0);
1507 
1508     nxt++;
1509     cur = nxt;
1510     /* now we can have extlang or script or region or variant */
1511     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512         goto region_m49;
1513 
1514     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516            nxt++;
1517     if (nxt - cur == 4)
1518         goto script;
1519     if (nxt - cur == 2)
1520         goto region;
1521     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522         goto variant;
1523     if (nxt - cur != 3)
1524         return(0);
1525     /* we parsed an extlang */
1526     if (nxt[0] == 0)
1527         return(1);
1528     if (nxt[0] != '-')
1529         return(0);
1530 
1531     nxt++;
1532     cur = nxt;
1533     /* now we can have script or region or variant */
1534     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1535         goto region_m49;
1536 
1537     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1538            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1539            nxt++;
1540     if (nxt - cur == 2)
1541         goto region;
1542     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1543         goto variant;
1544     if (nxt - cur != 4)
1545         return(0);
1546     /* we parsed a script */
1547 script:
1548     if (nxt[0] == 0)
1549         return(1);
1550     if (nxt[0] != '-')
1551         return(0);
1552 
1553     nxt++;
1554     cur = nxt;
1555     /* now we can have region or variant */
1556     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1557         goto region_m49;
1558 
1559     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1560            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1561            nxt++;
1562 
1563     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1564         goto variant;
1565     if (nxt - cur != 2)
1566         return(0);
1567     /* we parsed a region */
1568 region:
1569     if (nxt[0] == 0)
1570         return(1);
1571     if (nxt[0] != '-')
1572         return(0);
1573 
1574     nxt++;
1575     cur = nxt;
1576     /* now we can just have a variant */
1577     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1578            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1579            nxt++;
1580 
1581     if ((nxt - cur < 5) || (nxt - cur > 8))
1582         return(0);
1583 
1584     /* we parsed a variant */
1585 variant:
1586     if (nxt[0] == 0)
1587         return(1);
1588     if (nxt[0] != '-')
1589         return(0);
1590     /* extensions and private use subtags not checked */
1591     return (1);
1592 
1593 region_m49:
1594     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1595         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1596         nxt += 3;
1597         goto region;
1598     }
1599     return(0);
1600 }
1601 
1602 /************************************************************************
1603  *									*
1604  *		Parser stacks related functions and macros		*
1605  *									*
1606  ************************************************************************/
1607 
1608 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1609                                             const xmlChar ** str);
1610 
1611 #ifdef SAX2
1612 /**
1613  * nsPush:
1614  * @ctxt:  an XML parser context
1615  * @prefix:  the namespace prefix or NULL
1616  * @URL:  the namespace name
1617  *
1618  * Pushes a new parser namespace on top of the ns stack
1619  *
1620  * Returns -1 in case of error, -2 if the namespace should be discarded
1621  *	   and the index in the stack otherwise.
1622  */
1623 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1624 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1625 {
1626     if (ctxt->options & XML_PARSE_NSCLEAN) {
1627         int i;
1628 	for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1629 	    if (ctxt->nsTab[i] == prefix) {
1630 		/* in scope */
1631 	        if (ctxt->nsTab[i + 1] == URL)
1632 		    return(-2);
1633 		/* out of scope keep it */
1634 		break;
1635 	    }
1636 	}
1637     }
1638     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1639 	ctxt->nsMax = 10;
1640 	ctxt->nsNr = 0;
1641 	ctxt->nsTab = (const xmlChar **)
1642 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1643 	if (ctxt->nsTab == NULL) {
1644 	    xmlErrMemory(ctxt, NULL);
1645 	    ctxt->nsMax = 0;
1646             return (-1);
1647 	}
1648     } else if (ctxt->nsNr >= ctxt->nsMax) {
1649         const xmlChar ** tmp;
1650         ctxt->nsMax *= 2;
1651         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1652 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1653         if (tmp == NULL) {
1654             xmlErrMemory(ctxt, NULL);
1655 	    ctxt->nsMax /= 2;
1656             return (-1);
1657         }
1658 	ctxt->nsTab = tmp;
1659     }
1660     ctxt->nsTab[ctxt->nsNr++] = prefix;
1661     ctxt->nsTab[ctxt->nsNr++] = URL;
1662     return (ctxt->nsNr);
1663 }
1664 /**
1665  * nsPop:
1666  * @ctxt: an XML parser context
1667  * @nr:  the number to pop
1668  *
1669  * Pops the top @nr parser prefix/namespace from the ns stack
1670  *
1671  * Returns the number of namespaces removed
1672  */
1673 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1674 nsPop(xmlParserCtxtPtr ctxt, int nr)
1675 {
1676     int i;
1677 
1678     if (ctxt->nsTab == NULL) return(0);
1679     if (ctxt->nsNr < nr) {
1680         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1681         nr = ctxt->nsNr;
1682     }
1683     if (ctxt->nsNr <= 0)
1684         return (0);
1685 
1686     for (i = 0;i < nr;i++) {
1687          ctxt->nsNr--;
1688 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1689     }
1690     return(nr);
1691 }
1692 #endif
1693 
1694 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1695 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1696     const xmlChar **atts;
1697     int *attallocs;
1698     int maxatts;
1699 
1700     if (ctxt->atts == NULL) {
1701 	maxatts = 55; /* allow for 10 attrs by default */
1702 	atts = (const xmlChar **)
1703 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1704 	if (atts == NULL) goto mem_error;
1705 	ctxt->atts = atts;
1706 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1707 	if (attallocs == NULL) goto mem_error;
1708 	ctxt->attallocs = attallocs;
1709 	ctxt->maxatts = maxatts;
1710     } else if (nr + 5 > ctxt->maxatts) {
1711 	maxatts = (nr + 5) * 2;
1712 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1713 				     maxatts * sizeof(const xmlChar *));
1714 	if (atts == NULL) goto mem_error;
1715 	ctxt->atts = atts;
1716 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1717 	                             (maxatts / 5) * sizeof(int));
1718 	if (attallocs == NULL) goto mem_error;
1719 	ctxt->attallocs = attallocs;
1720 	ctxt->maxatts = maxatts;
1721     }
1722     return(ctxt->maxatts);
1723 mem_error:
1724     xmlErrMemory(ctxt, NULL);
1725     return(-1);
1726 }
1727 
1728 /**
1729  * inputPush:
1730  * @ctxt:  an XML parser context
1731  * @value:  the parser input
1732  *
1733  * Pushes a new parser input on top of the input stack
1734  *
1735  * Returns -1 in case of error, the index in the stack otherwise
1736  */
1737 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1738 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1739 {
1740     if ((ctxt == NULL) || (value == NULL))
1741         return(-1);
1742     if (ctxt->inputNr >= ctxt->inputMax) {
1743         ctxt->inputMax *= 2;
1744         ctxt->inputTab =
1745             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1746                                              ctxt->inputMax *
1747                                              sizeof(ctxt->inputTab[0]));
1748         if (ctxt->inputTab == NULL) {
1749             xmlErrMemory(ctxt, NULL);
1750 	    xmlFreeInputStream(value);
1751 	    ctxt->inputMax /= 2;
1752 	    value = NULL;
1753             return (-1);
1754         }
1755     }
1756     ctxt->inputTab[ctxt->inputNr] = value;
1757     ctxt->input = value;
1758     return (ctxt->inputNr++);
1759 }
1760 /**
1761  * inputPop:
1762  * @ctxt: an XML parser context
1763  *
1764  * Pops the top parser input from the input stack
1765  *
1766  * Returns the input just removed
1767  */
1768 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1769 inputPop(xmlParserCtxtPtr ctxt)
1770 {
1771     xmlParserInputPtr ret;
1772 
1773     if (ctxt == NULL)
1774         return(NULL);
1775     if (ctxt->inputNr <= 0)
1776         return (NULL);
1777     ctxt->inputNr--;
1778     if (ctxt->inputNr > 0)
1779         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1780     else
1781         ctxt->input = NULL;
1782     ret = ctxt->inputTab[ctxt->inputNr];
1783     ctxt->inputTab[ctxt->inputNr] = NULL;
1784     return (ret);
1785 }
1786 /**
1787  * nodePush:
1788  * @ctxt:  an XML parser context
1789  * @value:  the element node
1790  *
1791  * Pushes a new element node on top of the node stack
1792  *
1793  * Returns -1 in case of error, the index in the stack otherwise
1794  */
1795 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1796 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1797 {
1798     if (ctxt == NULL) return(0);
1799     if (ctxt->nodeNr >= ctxt->nodeMax) {
1800         xmlNodePtr *tmp;
1801 
1802 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1803                                       ctxt->nodeMax * 2 *
1804                                       sizeof(ctxt->nodeTab[0]));
1805         if (tmp == NULL) {
1806             xmlErrMemory(ctxt, NULL);
1807             return (-1);
1808         }
1809         ctxt->nodeTab = tmp;
1810 	ctxt->nodeMax *= 2;
1811     }
1812     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1813         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1814 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1815 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1816 			  xmlParserMaxDepth);
1817 	xmlHaltParser(ctxt);
1818 	return(-1);
1819     }
1820     ctxt->nodeTab[ctxt->nodeNr] = value;
1821     ctxt->node = value;
1822     return (ctxt->nodeNr++);
1823 }
1824 
1825 /**
1826  * nodePop:
1827  * @ctxt: an XML parser context
1828  *
1829  * Pops the top element node from the node stack
1830  *
1831  * Returns the node just removed
1832  */
1833 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1834 nodePop(xmlParserCtxtPtr ctxt)
1835 {
1836     xmlNodePtr ret;
1837 
1838     if (ctxt == NULL) return(NULL);
1839     if (ctxt->nodeNr <= 0)
1840         return (NULL);
1841     ctxt->nodeNr--;
1842     if (ctxt->nodeNr > 0)
1843         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1844     else
1845         ctxt->node = NULL;
1846     ret = ctxt->nodeTab[ctxt->nodeNr];
1847     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1848     return (ret);
1849 }
1850 
1851 #ifdef LIBXML_PUSH_ENABLED
1852 /**
1853  * nameNsPush:
1854  * @ctxt:  an XML parser context
1855  * @value:  the element name
1856  * @prefix:  the element prefix
1857  * @URI:  the element namespace name
1858  *
1859  * Pushes a new element name/prefix/URL on top of the name stack
1860  *
1861  * Returns -1 in case of error, the index in the stack otherwise
1862  */
1863 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1864 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1865            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1866 {
1867     if (ctxt->nameNr >= ctxt->nameMax) {
1868         const xmlChar * *tmp;
1869         void **tmp2;
1870         ctxt->nameMax *= 2;
1871         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1872                                     ctxt->nameMax *
1873                                     sizeof(ctxt->nameTab[0]));
1874         if (tmp == NULL) {
1875 	    ctxt->nameMax /= 2;
1876 	    goto mem_error;
1877         }
1878 	ctxt->nameTab = tmp;
1879         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1880                                     ctxt->nameMax * 3 *
1881                                     sizeof(ctxt->pushTab[0]));
1882         if (tmp2 == NULL) {
1883 	    ctxt->nameMax /= 2;
1884 	    goto mem_error;
1885         }
1886 	ctxt->pushTab = tmp2;
1887     }
1888     ctxt->nameTab[ctxt->nameNr] = value;
1889     ctxt->name = value;
1890     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1891     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1892     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1893     return (ctxt->nameNr++);
1894 mem_error:
1895     xmlErrMemory(ctxt, NULL);
1896     return (-1);
1897 }
1898 /**
1899  * nameNsPop:
1900  * @ctxt: an XML parser context
1901  *
1902  * Pops the top element/prefix/URI name from the name stack
1903  *
1904  * Returns the name just removed
1905  */
1906 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1907 nameNsPop(xmlParserCtxtPtr ctxt)
1908 {
1909     const xmlChar *ret;
1910 
1911     if (ctxt->nameNr <= 0)
1912         return (NULL);
1913     ctxt->nameNr--;
1914     if (ctxt->nameNr > 0)
1915         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1916     else
1917         ctxt->name = NULL;
1918     ret = ctxt->nameTab[ctxt->nameNr];
1919     ctxt->nameTab[ctxt->nameNr] = NULL;
1920     return (ret);
1921 }
1922 #endif /* LIBXML_PUSH_ENABLED */
1923 
1924 /**
1925  * namePush:
1926  * @ctxt:  an XML parser context
1927  * @value:  the element name
1928  *
1929  * Pushes a new element name on top of the name stack
1930  *
1931  * Returns -1 in case of error, the index in the stack otherwise
1932  */
1933 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1934 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1935 {
1936     if (ctxt == NULL) return (-1);
1937 
1938     if (ctxt->nameNr >= ctxt->nameMax) {
1939         const xmlChar * *tmp;
1940         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1941                                     ctxt->nameMax * 2 *
1942                                     sizeof(ctxt->nameTab[0]));
1943         if (tmp == NULL) {
1944 	    goto mem_error;
1945         }
1946 	ctxt->nameTab = tmp;
1947         ctxt->nameMax *= 2;
1948     }
1949     ctxt->nameTab[ctxt->nameNr] = value;
1950     ctxt->name = value;
1951     return (ctxt->nameNr++);
1952 mem_error:
1953     xmlErrMemory(ctxt, NULL);
1954     return (-1);
1955 }
1956 /**
1957  * namePop:
1958  * @ctxt: an XML parser context
1959  *
1960  * Pops the top element name from the name stack
1961  *
1962  * Returns the name just removed
1963  */
1964 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1965 namePop(xmlParserCtxtPtr ctxt)
1966 {
1967     const xmlChar *ret;
1968 
1969     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1970         return (NULL);
1971     ctxt->nameNr--;
1972     if (ctxt->nameNr > 0)
1973         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1974     else
1975         ctxt->name = NULL;
1976     ret = ctxt->nameTab[ctxt->nameNr];
1977     ctxt->nameTab[ctxt->nameNr] = NULL;
1978     return (ret);
1979 }
1980 
spacePush(xmlParserCtxtPtr ctxt,int val)1981 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1982     if (ctxt->spaceNr >= ctxt->spaceMax) {
1983         int *tmp;
1984 
1985 	ctxt->spaceMax *= 2;
1986         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1987 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1988         if (tmp == NULL) {
1989 	    xmlErrMemory(ctxt, NULL);
1990 	    ctxt->spaceMax /=2;
1991 	    return(-1);
1992 	}
1993 	ctxt->spaceTab = tmp;
1994     }
1995     ctxt->spaceTab[ctxt->spaceNr] = val;
1996     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1997     return(ctxt->spaceNr++);
1998 }
1999 
spacePop(xmlParserCtxtPtr ctxt)2000 static int spacePop(xmlParserCtxtPtr ctxt) {
2001     int ret;
2002     if (ctxt->spaceNr <= 0) return(0);
2003     ctxt->spaceNr--;
2004     if (ctxt->spaceNr > 0)
2005 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2006     else
2007         ctxt->space = &ctxt->spaceTab[0];
2008     ret = ctxt->spaceTab[ctxt->spaceNr];
2009     ctxt->spaceTab[ctxt->spaceNr] = -1;
2010     return(ret);
2011 }
2012 
2013 /*
2014  * Macros for accessing the content. Those should be used only by the parser,
2015  * and not exported.
2016  *
2017  * Dirty macros, i.e. one often need to make assumption on the context to
2018  * use them
2019  *
2020  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2021  *           To be used with extreme caution since operations consuming
2022  *           characters may move the input buffer to a different location !
2023  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2024  *           This should be used internally by the parser
2025  *           only to compare to ASCII values otherwise it would break when
2026  *           running with UTF-8 encoding.
2027  *   RAW     same as CUR but in the input buffer, bypass any token
2028  *           extraction that may have been done
2029  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2030  *           to compare on ASCII based substring.
2031  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2032  *           strings without newlines within the parser.
2033  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2034  *           defined char within the parser.
2035  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2036  *
2037  *   NEXT    Skip to the next character, this does the proper decoding
2038  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2039  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2040  *   CUR_CHAR(l) returns the current unicode character (int), set l
2041  *           to the number of xmlChars used for the encoding [0-5].
2042  *   CUR_SCHAR  same but operate on a string instead of the context
2043  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2044  *            the index
2045  *   GROW, SHRINK  handling of input buffers
2046  */
2047 
2048 #define RAW (*ctxt->input->cur)
2049 #define CUR (*ctxt->input->cur)
2050 #define NXT(val) ctxt->input->cur[(val)]
2051 #define CUR_PTR ctxt->input->cur
2052 #define BASE_PTR ctxt->input->base
2053 
2054 #define CMP4( s, c1, c2, c3, c4 ) \
2055   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2056     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2057 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2058   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2059 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2060   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2061 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2062   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2063 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2064   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2065 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2066   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2067     ((unsigned char *) s)[ 8 ] == c9 )
2068 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2069   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2070     ((unsigned char *) s)[ 9 ] == c10 )
2071 
2072 #define SKIP(val) do {							\
2073     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2074     if (*ctxt->input->cur == 0)						\
2075         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2076   } while (0)
2077 
2078 #define SKIPL(val) do {							\
2079     int skipl;								\
2080     for(skipl=0; skipl<val; skipl++) {					\
2081 	if (*(ctxt->input->cur) == '\n') {				\
2082 	ctxt->input->line++; ctxt->input->col = 1;			\
2083 	} else ctxt->input->col++;					\
2084 	ctxt->nbChars++;						\
2085 	ctxt->input->cur++;						\
2086     }									\
2087     if (*ctxt->input->cur == 0)						\
2088         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);			\
2089   } while (0)
2090 
2091 #define SHRINK if ((ctxt->progressive == 0) &&				\
2092 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2093 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2094 	xmlSHRINK (ctxt);
2095 
xmlSHRINK(xmlParserCtxtPtr ctxt)2096 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2097     xmlParserInputShrink(ctxt->input);
2098     if (*ctxt->input->cur == 0)
2099         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2100 }
2101 
2102 #define GROW if ((ctxt->progressive == 0) &&				\
2103 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2104 	xmlGROW (ctxt);
2105 
xmlGROW(xmlParserCtxtPtr ctxt)2106 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2107     unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2108     unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2109 
2110     if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2111          (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2112          ((ctxt->input->buf) &&
2113           (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2114         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2115         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2116         xmlHaltParser(ctxt);
2117 	return;
2118     }
2119     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2120     if ((ctxt->input->cur > ctxt->input->end) ||
2121         (ctxt->input->cur < ctxt->input->base)) {
2122         xmlHaltParser(ctxt);
2123         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2124 	return;
2125     }
2126     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2127         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2128 }
2129 
2130 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2131 
2132 #define NEXT xmlNextChar(ctxt)
2133 
2134 #define NEXT1 {								\
2135 	ctxt->input->col++;						\
2136 	ctxt->input->cur++;						\
2137 	ctxt->nbChars++;						\
2138 	if (*ctxt->input->cur == 0)					\
2139 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2140     }
2141 
2142 #define NEXTL(l) do {							\
2143     if (*(ctxt->input->cur) == '\n') {					\
2144 	ctxt->input->line++; ctxt->input->col = 1;			\
2145     } else ctxt->input->col++;						\
2146     ctxt->input->cur += l;				\
2147   } while (0)
2148 
2149 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2150 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2151 
2152 #define COPY_BUF(l,b,i,v)						\
2153     if (l == 1) b[i++] = (xmlChar) v;					\
2154     else i += xmlCopyCharMultiByte(&b[i],v)
2155 
2156 /**
2157  * xmlSkipBlankChars:
2158  * @ctxt:  the XML parser context
2159  *
2160  * skip all blanks character found at that point in the input streams.
2161  * It pops up finished entities in the process if allowable at that point.
2162  *
2163  * Returns the number of space chars skipped
2164  */
2165 
2166 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2167 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2168     int res = 0;
2169 
2170     /*
2171      * It's Okay to use CUR/NEXT here since all the blanks are on
2172      * the ASCII range.
2173      */
2174     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2175 	const xmlChar *cur;
2176 	/*
2177 	 * if we are in the document content, go really fast
2178 	 */
2179 	cur = ctxt->input->cur;
2180 	while (IS_BLANK_CH(*cur)) {
2181 	    if (*cur == '\n') {
2182 		ctxt->input->line++; ctxt->input->col = 1;
2183 	    } else {
2184 		ctxt->input->col++;
2185 	    }
2186 	    cur++;
2187 	    res++;
2188 	    if (*cur == 0) {
2189 		ctxt->input->cur = cur;
2190 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2191 		cur = ctxt->input->cur;
2192 	    }
2193 	}
2194 	ctxt->input->cur = cur;
2195     } else {
2196         int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2197 
2198 	while (1) {
2199             if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2200 		NEXT;
2201 	    } else if (CUR == '%') {
2202                 /*
2203                  * Need to handle support of entities branching here
2204                  */
2205 	        if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2206                     break;
2207 	        xmlParsePEReference(ctxt);
2208             } else if (CUR == 0) {
2209                 if (ctxt->inputNr <= 1)
2210                     break;
2211                 xmlPopInput(ctxt);
2212             } else {
2213                 break;
2214             }
2215 
2216             /*
2217              * Also increase the counter when entering or exiting a PERef.
2218              * The spec says: "When a parameter-entity reference is recognized
2219              * in the DTD and included, its replacement text MUST be enlarged
2220              * by the attachment of one leading and one following space (#x20)
2221              * character."
2222              */
2223 	    res++;
2224         }
2225     }
2226     return(res);
2227 }
2228 
2229 /************************************************************************
2230  *									*
2231  *		Commodity functions to handle entities			*
2232  *									*
2233  ************************************************************************/
2234 
2235 /**
2236  * xmlPopInput:
2237  * @ctxt:  an XML parser context
2238  *
2239  * xmlPopInput: the current input pointed by ctxt->input came to an end
2240  *          pop it and return the next char.
2241  *
2242  * Returns the current xmlChar in the parser context
2243  */
2244 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2245 xmlPopInput(xmlParserCtxtPtr ctxt) {
2246     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2247     if (xmlParserDebugEntities)
2248 	xmlGenericError(xmlGenericErrorContext,
2249 		"Popping input %d\n", ctxt->inputNr);
2250     if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2251         (ctxt->instate != XML_PARSER_EOF))
2252         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2253                     "Unfinished entity outside the DTD");
2254     xmlFreeInputStream(inputPop(ctxt));
2255     if (*ctxt->input->cur == 0)
2256         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2257     return(CUR);
2258 }
2259 
2260 /**
2261  * xmlPushInput:
2262  * @ctxt:  an XML parser context
2263  * @input:  an XML parser input fragment (entity, XML fragment ...).
2264  *
2265  * xmlPushInput: switch to a new input stream which is stacked on top
2266  *               of the previous one(s).
2267  * Returns -1 in case of error or the index in the input stack
2268  */
2269 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2270 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2271     int ret;
2272     if (input == NULL) return(-1);
2273 
2274     if (xmlParserDebugEntities) {
2275 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2276 	    xmlGenericError(xmlGenericErrorContext,
2277 		    "%s(%d): ", ctxt->input->filename,
2278 		    ctxt->input->line);
2279 	xmlGenericError(xmlGenericErrorContext,
2280 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2281     }
2282     if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2283         (ctxt->inputNr > 1024)) {
2284         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2285         while (ctxt->inputNr > 1)
2286             xmlFreeInputStream(inputPop(ctxt));
2287 	return(-1);
2288     }
2289     ret = inputPush(ctxt, input);
2290     if (ctxt->instate == XML_PARSER_EOF)
2291         return(-1);
2292     GROW;
2293     return(ret);
2294 }
2295 
2296 /**
2297  * xmlParseCharRef:
2298  * @ctxt:  an XML parser context
2299  *
2300  * parse Reference declarations
2301  *
2302  * [66] CharRef ::= '&#' [0-9]+ ';' |
2303  *                  '&#x' [0-9a-fA-F]+ ';'
2304  *
2305  * [ WFC: Legal Character ]
2306  * Characters referred to using character references must match the
2307  * production for Char.
2308  *
2309  * Returns the value parsed (as an int), 0 in case of error
2310  */
2311 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2312 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2313     unsigned int val = 0;
2314     int count = 0;
2315     unsigned int outofrange = 0;
2316 
2317     /*
2318      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2319      */
2320     if ((RAW == '&') && (NXT(1) == '#') &&
2321         (NXT(2) == 'x')) {
2322 	SKIP(3);
2323 	GROW;
2324 	while (RAW != ';') { /* loop blocked by count */
2325 	    if (count++ > 20) {
2326 		count = 0;
2327 		GROW;
2328                 if (ctxt->instate == XML_PARSER_EOF)
2329                     return(0);
2330 	    }
2331 	    if ((RAW >= '0') && (RAW <= '9'))
2332 	        val = val * 16 + (CUR - '0');
2333 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2334 	        val = val * 16 + (CUR - 'a') + 10;
2335 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2336 	        val = val * 16 + (CUR - 'A') + 10;
2337 	    else {
2338 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2339 		val = 0;
2340 		break;
2341 	    }
2342 	    if (val > 0x10FFFF)
2343 	        outofrange = val;
2344 
2345 	    NEXT;
2346 	    count++;
2347 	}
2348 	if (RAW == ';') {
2349 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 	    ctxt->input->col++;
2351 	    ctxt->nbChars ++;
2352 	    ctxt->input->cur++;
2353 	}
2354     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2355 	SKIP(2);
2356 	GROW;
2357 	while (RAW != ';') { /* loop blocked by count */
2358 	    if (count++ > 20) {
2359 		count = 0;
2360 		GROW;
2361                 if (ctxt->instate == XML_PARSER_EOF)
2362                     return(0);
2363 	    }
2364 	    if ((RAW >= '0') && (RAW <= '9'))
2365 	        val = val * 10 + (CUR - '0');
2366 	    else {
2367 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2368 		val = 0;
2369 		break;
2370 	    }
2371 	    if (val > 0x10FFFF)
2372 	        outofrange = val;
2373 
2374 	    NEXT;
2375 	    count++;
2376 	}
2377 	if (RAW == ';') {
2378 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2379 	    ctxt->input->col++;
2380 	    ctxt->nbChars ++;
2381 	    ctxt->input->cur++;
2382 	}
2383     } else {
2384         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2385     }
2386 
2387     /*
2388      * [ WFC: Legal Character ]
2389      * Characters referred to using character references must match the
2390      * production for Char.
2391      */
2392     if ((IS_CHAR(val) && (outofrange == 0))) {
2393         return(val);
2394     } else {
2395         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396                           "xmlParseCharRef: invalid xmlChar value %d\n",
2397 	                  val);
2398     }
2399     return(0);
2400 }
2401 
2402 /**
2403  * xmlParseStringCharRef:
2404  * @ctxt:  an XML parser context
2405  * @str:  a pointer to an index in the string
2406  *
2407  * parse Reference declarations, variant parsing from a string rather
2408  * than an an input flow.
2409  *
2410  * [66] CharRef ::= '&#' [0-9]+ ';' |
2411  *                  '&#x' [0-9a-fA-F]+ ';'
2412  *
2413  * [ WFC: Legal Character ]
2414  * Characters referred to using character references must match the
2415  * production for Char.
2416  *
2417  * Returns the value parsed (as an int), 0 in case of error, str will be
2418  *         updated to the current value of the index
2419  */
2420 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2421 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422     const xmlChar *ptr;
2423     xmlChar cur;
2424     unsigned int val = 0;
2425     unsigned int outofrange = 0;
2426 
2427     if ((str == NULL) || (*str == NULL)) return(0);
2428     ptr = *str;
2429     cur = *ptr;
2430     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2431 	ptr += 3;
2432 	cur = *ptr;
2433 	while (cur != ';') { /* Non input consuming loop */
2434 	    if ((cur >= '0') && (cur <= '9'))
2435 	        val = val * 16 + (cur - '0');
2436 	    else if ((cur >= 'a') && (cur <= 'f'))
2437 	        val = val * 16 + (cur - 'a') + 10;
2438 	    else if ((cur >= 'A') && (cur <= 'F'))
2439 	        val = val * 16 + (cur - 'A') + 10;
2440 	    else {
2441 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2442 		val = 0;
2443 		break;
2444 	    }
2445 	    if (val > 0x10FFFF)
2446 	        outofrange = val;
2447 
2448 	    ptr++;
2449 	    cur = *ptr;
2450 	}
2451 	if (cur == ';')
2452 	    ptr++;
2453     } else if  ((cur == '&') && (ptr[1] == '#')){
2454 	ptr += 2;
2455 	cur = *ptr;
2456 	while (cur != ';') { /* Non input consuming loops */
2457 	    if ((cur >= '0') && (cur <= '9'))
2458 	        val = val * 10 + (cur - '0');
2459 	    else {
2460 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2461 		val = 0;
2462 		break;
2463 	    }
2464 	    if (val > 0x10FFFF)
2465 	        outofrange = val;
2466 
2467 	    ptr++;
2468 	    cur = *ptr;
2469 	}
2470 	if (cur == ';')
2471 	    ptr++;
2472     } else {
2473 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2474 	return(0);
2475     }
2476     *str = ptr;
2477 
2478     /*
2479      * [ WFC: Legal Character ]
2480      * Characters referred to using character references must match the
2481      * production for Char.
2482      */
2483     if ((IS_CHAR(val) && (outofrange == 0))) {
2484         return(val);
2485     } else {
2486         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2487 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2488 			  val);
2489     }
2490     return(0);
2491 }
2492 
2493 /**
2494  * xmlParserHandlePEReference:
2495  * @ctxt:  the parser context
2496  *
2497  * [69] PEReference ::= '%' Name ';'
2498  *
2499  * [ WFC: No Recursion ]
2500  * A parsed entity must not contain a recursive
2501  * reference to itself, either directly or indirectly.
2502  *
2503  * [ WFC: Entity Declared ]
2504  * In a document without any DTD, a document with only an internal DTD
2505  * subset which contains no parameter entity references, or a document
2506  * with "standalone='yes'", ...  ... The declaration of a parameter
2507  * entity must precede any reference to it...
2508  *
2509  * [ VC: Entity Declared ]
2510  * In a document with an external subset or external parameter entities
2511  * with "standalone='no'", ...  ... The declaration of a parameter entity
2512  * must precede any reference to it...
2513  *
2514  * [ WFC: In DTD ]
2515  * Parameter-entity references may only appear in the DTD.
2516  * NOTE: misleading but this is handled.
2517  *
2518  * A PEReference may have been detected in the current input stream
2519  * the handling is done accordingly to
2520  *      http://www.w3.org/TR/REC-xml#entproc
2521  * i.e.
2522  *   - Included in literal in entity values
2523  *   - Included as Parameter Entity reference within DTDs
2524  */
2525 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2526 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2527     switch(ctxt->instate) {
2528 	case XML_PARSER_CDATA_SECTION:
2529 	    return;
2530         case XML_PARSER_COMMENT:
2531 	    return;
2532 	case XML_PARSER_START_TAG:
2533 	    return;
2534 	case XML_PARSER_END_TAG:
2535 	    return;
2536         case XML_PARSER_EOF:
2537 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2538 	    return;
2539         case XML_PARSER_PROLOG:
2540 	case XML_PARSER_START:
2541 	case XML_PARSER_MISC:
2542 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2543 	    return;
2544 	case XML_PARSER_ENTITY_DECL:
2545         case XML_PARSER_CONTENT:
2546         case XML_PARSER_ATTRIBUTE_VALUE:
2547         case XML_PARSER_PI:
2548 	case XML_PARSER_SYSTEM_LITERAL:
2549 	case XML_PARSER_PUBLIC_LITERAL:
2550 	    /* we just ignore it there */
2551 	    return;
2552         case XML_PARSER_EPILOG:
2553 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2554 	    return;
2555 	case XML_PARSER_ENTITY_VALUE:
2556 	    /*
2557 	     * NOTE: in the case of entity values, we don't do the
2558 	     *       substitution here since we need the literal
2559 	     *       entity value to be able to save the internal
2560 	     *       subset of the document.
2561 	     *       This will be handled by xmlStringDecodeEntities
2562 	     */
2563 	    return;
2564         case XML_PARSER_DTD:
2565 	    /*
2566 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2567 	     * In the internal DTD subset, parameter-entity references
2568 	     * can occur only where markup declarations can occur, not
2569 	     * within markup declarations.
2570 	     * In that case this is handled in xmlParseMarkupDecl
2571 	     */
2572 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2573 		return;
2574 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2575 		return;
2576             break;
2577         case XML_PARSER_IGNORE:
2578             return;
2579     }
2580 
2581     xmlParsePEReference(ctxt);
2582 }
2583 
2584 /*
2585  * Macro used to grow the current buffer.
2586  * buffer##_size is expected to be a size_t
2587  * mem_error: is expected to handle memory allocation failures
2588  */
2589 #define growBuffer(buffer, n) {						\
2590     xmlChar *tmp;							\
2591     size_t new_size = buffer##_size * 2 + n;                            \
2592     if (new_size < buffer##_size) goto mem_error;                       \
2593     tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2594     if (tmp == NULL) goto mem_error;					\
2595     buffer = tmp;							\
2596     buffer##_size = new_size;                                           \
2597 }
2598 
2599 /**
2600  * xmlStringLenDecodeEntities:
2601  * @ctxt:  the parser context
2602  * @str:  the input string
2603  * @len: the string length
2604  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2605  * @end:  an end marker xmlChar, 0 if none
2606  * @end2:  an end marker xmlChar, 0 if none
2607  * @end3:  an end marker xmlChar, 0 if none
2608  *
2609  * Takes a entity string content and process to do the adequate substitutions.
2610  *
2611  * [67] Reference ::= EntityRef | CharRef
2612  *
2613  * [69] PEReference ::= '%' Name ';'
2614  *
2615  * Returns A newly allocated string with the substitution done. The caller
2616  *      must deallocate it !
2617  */
2618 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2619 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2620 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2621     xmlChar *buffer = NULL;
2622     size_t buffer_size = 0;
2623     size_t nbchars = 0;
2624 
2625     xmlChar *current = NULL;
2626     xmlChar *rep = NULL;
2627     const xmlChar *last;
2628     xmlEntityPtr ent;
2629     int c,l;
2630 
2631     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2632 	return(NULL);
2633     last = str + len;
2634 
2635     if (((ctxt->depth > 40) &&
2636          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2637 	(ctxt->depth > 1024)) {
2638 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2639 	return(NULL);
2640     }
2641 
2642     /*
2643      * allocate a translation buffer.
2644      */
2645     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2646     buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2647     if (buffer == NULL) goto mem_error;
2648 
2649     /*
2650      * OK loop until we reach one of the ending char or a size limit.
2651      * we are operating on already parsed values.
2652      */
2653     if (str < last)
2654 	c = CUR_SCHAR(str, l);
2655     else
2656         c = 0;
2657     while ((c != 0) && (c != end) && /* non input consuming loop */
2658 	   (c != end2) && (c != end3)) {
2659 
2660 	if (c == 0) break;
2661         if ((c == '&') && (str[1] == '#')) {
2662 	    int val = xmlParseStringCharRef(ctxt, &str);
2663 	    if (val == 0)
2664                 goto int_error;
2665 	    COPY_BUF(0,buffer,nbchars,val);
2666 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2667 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2668 	    }
2669 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2670 	    if (xmlParserDebugEntities)
2671 		xmlGenericError(xmlGenericErrorContext,
2672 			"String decoding Entity Reference: %.30s\n",
2673 			str);
2674 	    ent = xmlParseStringEntityRef(ctxt, &str);
2675 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2676 	    if (ent != NULL)
2677 	        ctxt->nbentities += ent->checked / 2;
2678 	    if ((ent != NULL) &&
2679 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2680 		if (ent->content != NULL) {
2681 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2682 		    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2683 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2684 		    }
2685 		} else {
2686 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2687 			    "predefined entity has no content\n");
2688                     goto int_error;
2689 		}
2690 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2691 		ctxt->depth++;
2692 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2693 			                      0, 0, 0);
2694 		ctxt->depth--;
2695 		if (rep == NULL)
2696                     goto int_error;
2697 
2698                 current = rep;
2699                 while (*current != 0) { /* non input consuming loop */
2700                     buffer[nbchars++] = *current++;
2701                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2702                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2703                             goto int_error;
2704                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2705                     }
2706                 }
2707                 xmlFree(rep);
2708                 rep = NULL;
2709 	    } else if (ent != NULL) {
2710 		int i = xmlStrlen(ent->name);
2711 		const xmlChar *cur = ent->name;
2712 
2713 		buffer[nbchars++] = '&';
2714 		if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2715 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2716 		}
2717 		for (;i > 0;i--)
2718 		    buffer[nbchars++] = *cur++;
2719 		buffer[nbchars++] = ';';
2720 	    }
2721 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2722 	    if (xmlParserDebugEntities)
2723 		xmlGenericError(xmlGenericErrorContext,
2724 			"String decoding PE Reference: %.30s\n", str);
2725 	    ent = xmlParseStringPEReference(ctxt, &str);
2726 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
2727 	    if (ent != NULL)
2728 	        ctxt->nbentities += ent->checked / 2;
2729 	    if (ent != NULL) {
2730                 if (ent->content == NULL) {
2731 		    /*
2732 		     * Note: external parsed entities will not be loaded,
2733 		     * it is not required for a non-validating parser to
2734 		     * complete external PEreferences coming from the
2735 		     * internal subset
2736 		     */
2737 		    if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2738 			((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2739 			(ctxt->validate != 0)) {
2740 			xmlLoadEntityContent(ctxt, ent);
2741 		    } else {
2742 			xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2743 		  "not validating will not read content for PE entity %s\n",
2744 		                      ent->name, NULL);
2745 		    }
2746 		}
2747 		ctxt->depth++;
2748 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2749 			                      0, 0, 0);
2750 		ctxt->depth--;
2751 		if (rep == NULL)
2752                     goto int_error;
2753                 current = rep;
2754                 while (*current != 0) { /* non input consuming loop */
2755                     buffer[nbchars++] = *current++;
2756                     if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2757                         if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2758                             goto int_error;
2759                         growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2760                     }
2761                 }
2762                 xmlFree(rep);
2763                 rep = NULL;
2764 	    }
2765 	} else {
2766 	    COPY_BUF(l,buffer,nbchars,c);
2767 	    str += l;
2768 	    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2769 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2770 	    }
2771 	}
2772 	if (str < last)
2773 	    c = CUR_SCHAR(str, l);
2774 	else
2775 	    c = 0;
2776     }
2777     buffer[nbchars] = 0;
2778     return(buffer);
2779 
2780 mem_error:
2781     xmlErrMemory(ctxt, NULL);
2782 int_error:
2783     if (rep != NULL)
2784         xmlFree(rep);
2785     if (buffer != NULL)
2786         xmlFree(buffer);
2787     return(NULL);
2788 }
2789 
2790 /**
2791  * xmlStringDecodeEntities:
2792  * @ctxt:  the parser context
2793  * @str:  the input string
2794  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2795  * @end:  an end marker xmlChar, 0 if none
2796  * @end2:  an end marker xmlChar, 0 if none
2797  * @end3:  an end marker xmlChar, 0 if none
2798  *
2799  * Takes a entity string content and process to do the adequate substitutions.
2800  *
2801  * [67] Reference ::= EntityRef | CharRef
2802  *
2803  * [69] PEReference ::= '%' Name ';'
2804  *
2805  * Returns A newly allocated string with the substitution done. The caller
2806  *      must deallocate it !
2807  */
2808 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2809 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2810 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2811     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2812     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2813            end, end2, end3));
2814 }
2815 
2816 /************************************************************************
2817  *									*
2818  *		Commodity functions, cleanup needed ?			*
2819  *									*
2820  ************************************************************************/
2821 
2822 /**
2823  * areBlanks:
2824  * @ctxt:  an XML parser context
2825  * @str:  a xmlChar *
2826  * @len:  the size of @str
2827  * @blank_chars: we know the chars are blanks
2828  *
2829  * Is this a sequence of blank chars that one can ignore ?
2830  *
2831  * Returns 1 if ignorable 0 otherwise.
2832  */
2833 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2834 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2835                      int blank_chars) {
2836     int i, ret;
2837     xmlNodePtr lastChild;
2838 
2839     /*
2840      * Don't spend time trying to differentiate them, the same callback is
2841      * used !
2842      */
2843     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2844 	return(0);
2845 
2846     /*
2847      * Check for xml:space value.
2848      */
2849     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2850         (*(ctxt->space) == -2))
2851 	return(0);
2852 
2853     /*
2854      * Check that the string is made of blanks
2855      */
2856     if (blank_chars == 0) {
2857 	for (i = 0;i < len;i++)
2858 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2859     }
2860 
2861     /*
2862      * Look if the element is mixed content in the DTD if available
2863      */
2864     if (ctxt->node == NULL) return(0);
2865     if (ctxt->myDoc != NULL) {
2866 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2867         if (ret == 0) return(1);
2868         if (ret == 1) return(0);
2869     }
2870 
2871     /*
2872      * Otherwise, heuristic :-\
2873      */
2874     if ((RAW != '<') && (RAW != 0xD)) return(0);
2875     if ((ctxt->node->children == NULL) &&
2876 	(RAW == '<') && (NXT(1) == '/')) return(0);
2877 
2878     lastChild = xmlGetLastChild(ctxt->node);
2879     if (lastChild == NULL) {
2880         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2881             (ctxt->node->content != NULL)) return(0);
2882     } else if (xmlNodeIsText(lastChild))
2883         return(0);
2884     else if ((ctxt->node->children != NULL) &&
2885              (xmlNodeIsText(ctxt->node->children)))
2886         return(0);
2887     return(1);
2888 }
2889 
2890 /************************************************************************
2891  *									*
2892  *		Extra stuff for namespace support			*
2893  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2894  *									*
2895  ************************************************************************/
2896 
2897 /**
2898  * xmlSplitQName:
2899  * @ctxt:  an XML parser context
2900  * @name:  an XML parser context
2901  * @prefix:  a xmlChar **
2902  *
2903  * parse an UTF8 encoded XML qualified name string
2904  *
2905  * [NS 5] QName ::= (Prefix ':')? LocalPart
2906  *
2907  * [NS 6] Prefix ::= NCName
2908  *
2909  * [NS 7] LocalPart ::= NCName
2910  *
2911  * Returns the local part, and prefix is updated
2912  *   to get the Prefix if any.
2913  */
2914 
2915 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2916 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2917     xmlChar buf[XML_MAX_NAMELEN + 5];
2918     xmlChar *buffer = NULL;
2919     int len = 0;
2920     int max = XML_MAX_NAMELEN;
2921     xmlChar *ret = NULL;
2922     const xmlChar *cur = name;
2923     int c;
2924 
2925     if (prefix == NULL) return(NULL);
2926     *prefix = NULL;
2927 
2928     if (cur == NULL) return(NULL);
2929 
2930 #ifndef XML_XML_NAMESPACE
2931     /* xml: prefix is not really a namespace */
2932     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2933         (cur[2] == 'l') && (cur[3] == ':'))
2934 	return(xmlStrdup(name));
2935 #endif
2936 
2937     /* nasty but well=formed */
2938     if (cur[0] == ':')
2939 	return(xmlStrdup(name));
2940 
2941     c = *cur++;
2942     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2943 	buf[len++] = c;
2944 	c = *cur++;
2945     }
2946     if (len >= max) {
2947 	/*
2948 	 * Okay someone managed to make a huge name, so he's ready to pay
2949 	 * for the processing speed.
2950 	 */
2951 	max = len * 2;
2952 
2953 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2954 	if (buffer == NULL) {
2955 	    xmlErrMemory(ctxt, NULL);
2956 	    return(NULL);
2957 	}
2958 	memcpy(buffer, buf, len);
2959 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2960 	    if (len + 10 > max) {
2961 	        xmlChar *tmp;
2962 
2963 		max *= 2;
2964 		tmp = (xmlChar *) xmlRealloc(buffer,
2965 						max * sizeof(xmlChar));
2966 		if (tmp == NULL) {
2967 		    xmlFree(buffer);
2968 		    xmlErrMemory(ctxt, NULL);
2969 		    return(NULL);
2970 		}
2971 		buffer = tmp;
2972 	    }
2973 	    buffer[len++] = c;
2974 	    c = *cur++;
2975 	}
2976 	buffer[len] = 0;
2977     }
2978 
2979     if ((c == ':') && (*cur == 0)) {
2980         if (buffer != NULL)
2981 	    xmlFree(buffer);
2982 	*prefix = NULL;
2983 	return(xmlStrdup(name));
2984     }
2985 
2986     if (buffer == NULL)
2987 	ret = xmlStrndup(buf, len);
2988     else {
2989 	ret = buffer;
2990 	buffer = NULL;
2991 	max = XML_MAX_NAMELEN;
2992     }
2993 
2994 
2995     if (c == ':') {
2996 	c = *cur;
2997         *prefix = ret;
2998 	if (c == 0) {
2999 	    return(xmlStrndup(BAD_CAST "", 0));
3000 	}
3001 	len = 0;
3002 
3003 	/*
3004 	 * Check that the first character is proper to start
3005 	 * a new name
3006 	 */
3007 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3008 	      ((c >= 0x41) && (c <= 0x5A)) ||
3009 	      (c == '_') || (c == ':'))) {
3010 	    int l;
3011 	    int first = CUR_SCHAR(cur, l);
3012 
3013 	    if (!IS_LETTER(first) && (first != '_')) {
3014 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3015 			    "Name %s is not XML Namespace compliant\n",
3016 				  name);
3017 	    }
3018 	}
3019 	cur++;
3020 
3021 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3022 	    buf[len++] = c;
3023 	    c = *cur++;
3024 	}
3025 	if (len >= max) {
3026 	    /*
3027 	     * Okay someone managed to make a huge name, so he's ready to pay
3028 	     * for the processing speed.
3029 	     */
3030 	    max = len * 2;
3031 
3032 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3033 	    if (buffer == NULL) {
3034 	        xmlErrMemory(ctxt, NULL);
3035 		return(NULL);
3036 	    }
3037 	    memcpy(buffer, buf, len);
3038 	    while (c != 0) { /* tested bigname2.xml */
3039 		if (len + 10 > max) {
3040 		    xmlChar *tmp;
3041 
3042 		    max *= 2;
3043 		    tmp = (xmlChar *) xmlRealloc(buffer,
3044 						    max * sizeof(xmlChar));
3045 		    if (tmp == NULL) {
3046 			xmlErrMemory(ctxt, NULL);
3047 			xmlFree(buffer);
3048 			return(NULL);
3049 		    }
3050 		    buffer = tmp;
3051 		}
3052 		buffer[len++] = c;
3053 		c = *cur++;
3054 	    }
3055 	    buffer[len] = 0;
3056 	}
3057 
3058 	if (buffer == NULL)
3059 	    ret = xmlStrndup(buf, len);
3060 	else {
3061 	    ret = buffer;
3062 	}
3063     }
3064 
3065     return(ret);
3066 }
3067 
3068 /************************************************************************
3069  *									*
3070  *			The parser itself				*
3071  *	Relates to http://www.w3.org/TR/REC-xml				*
3072  *									*
3073  ************************************************************************/
3074 
3075 /************************************************************************
3076  *									*
3077  *	Routines to parse Name, NCName and NmToken			*
3078  *									*
3079  ************************************************************************/
3080 #ifdef DEBUG
3081 static unsigned long nbParseName = 0;
3082 static unsigned long nbParseNmToken = 0;
3083 static unsigned long nbParseNCName = 0;
3084 static unsigned long nbParseNCNameComplex = 0;
3085 static unsigned long nbParseNameComplex = 0;
3086 static unsigned long nbParseStringName = 0;
3087 #endif
3088 
3089 /*
3090  * The two following functions are related to the change of accepted
3091  * characters for Name and NmToken in the Revision 5 of XML-1.0
3092  * They correspond to the modified production [4] and the new production [4a]
3093  * changes in that revision. Also note that the macros used for the
3094  * productions Letter, Digit, CombiningChar and Extender are not needed
3095  * anymore.
3096  * We still keep compatibility to pre-revision5 parsing semantic if the
3097  * new XML_PARSE_OLD10 option is given to the parser.
3098  */
3099 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3100 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3101     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3102         /*
3103 	 * Use the new checks of production [4] [4a] amd [5] of the
3104 	 * Update 5 of XML-1.0
3105 	 */
3106 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3107 	    (((c >= 'a') && (c <= 'z')) ||
3108 	     ((c >= 'A') && (c <= 'Z')) ||
3109 	     (c == '_') || (c == ':') ||
3110 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3111 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3112 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3113 	     ((c >= 0x370) && (c <= 0x37D)) ||
3114 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3115 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3116 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3117 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3118 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3119 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3120 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3121 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3122 	    return(1);
3123     } else {
3124         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3125 	    return(1);
3126     }
3127     return(0);
3128 }
3129 
3130 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3131 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3132     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3133         /*
3134 	 * Use the new checks of production [4] [4a] amd [5] of the
3135 	 * Update 5 of XML-1.0
3136 	 */
3137 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3138 	    (((c >= 'a') && (c <= 'z')) ||
3139 	     ((c >= 'A') && (c <= 'Z')) ||
3140 	     ((c >= '0') && (c <= '9')) || /* !start */
3141 	     (c == '_') || (c == ':') ||
3142 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3143 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3144 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3145 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3146 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3147 	     ((c >= 0x370) && (c <= 0x37D)) ||
3148 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3149 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3150 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3151 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3152 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3153 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3154 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3155 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3156 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3157 	     return(1);
3158     } else {
3159         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3160             (c == '.') || (c == '-') ||
3161 	    (c == '_') || (c == ':') ||
3162 	    (IS_COMBINING(c)) ||
3163 	    (IS_EXTENDER(c)))
3164 	    return(1);
3165     }
3166     return(0);
3167 }
3168 
3169 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3170                                           int *len, int *alloc, int normalize);
3171 
3172 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3173 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3174     int len = 0, l;
3175     int c;
3176     int count = 0;
3177 
3178 #ifdef DEBUG
3179     nbParseNameComplex++;
3180 #endif
3181 
3182     /*
3183      * Handler for more complex cases
3184      */
3185     GROW;
3186     if (ctxt->instate == XML_PARSER_EOF)
3187         return(NULL);
3188     c = CUR_CHAR(l);
3189     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3190         /*
3191 	 * Use the new checks of production [4] [4a] amd [5] of the
3192 	 * Update 5 of XML-1.0
3193 	 */
3194 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3195 	    (!(((c >= 'a') && (c <= 'z')) ||
3196 	       ((c >= 'A') && (c <= 'Z')) ||
3197 	       (c == '_') || (c == ':') ||
3198 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3199 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3200 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3201 	       ((c >= 0x370) && (c <= 0x37D)) ||
3202 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3204 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3205 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3206 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3207 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3208 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3209 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3210 	    return(NULL);
3211 	}
3212 	len += l;
3213 	NEXTL(l);
3214 	c = CUR_CHAR(l);
3215 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3216 	       (((c >= 'a') && (c <= 'z')) ||
3217 	        ((c >= 'A') && (c <= 'Z')) ||
3218 	        ((c >= '0') && (c <= '9')) || /* !start */
3219 	        (c == '_') || (c == ':') ||
3220 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3221 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3222 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3223 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3224 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3225 	        ((c >= 0x370) && (c <= 0x37D)) ||
3226 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3227 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3228 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3229 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3230 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3231 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3232 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3233 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3234 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3235 		)) {
3236 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3237 		count = 0;
3238 		GROW;
3239                 if (ctxt->instate == XML_PARSER_EOF)
3240                     return(NULL);
3241 	    }
3242 	    len += l;
3243 	    NEXTL(l);
3244 	    c = CUR_CHAR(l);
3245 	}
3246     } else {
3247 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248 	    (!IS_LETTER(c) && (c != '_') &&
3249 	     (c != ':'))) {
3250 	    return(NULL);
3251 	}
3252 	len += l;
3253 	NEXTL(l);
3254 	c = CUR_CHAR(l);
3255 
3256 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3257 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3258 		(c == '.') || (c == '-') ||
3259 		(c == '_') || (c == ':') ||
3260 		(IS_COMBINING(c)) ||
3261 		(IS_EXTENDER(c)))) {
3262 	    if (count++ > XML_PARSER_CHUNK_SIZE) {
3263 		count = 0;
3264 		GROW;
3265                 if (ctxt->instate == XML_PARSER_EOF)
3266                     return(NULL);
3267 	    }
3268 	    len += l;
3269 	    NEXTL(l);
3270 	    c = CUR_CHAR(l);
3271 	}
3272     }
3273     if ((len > XML_MAX_NAME_LENGTH) &&
3274         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3275         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3276         return(NULL);
3277     }
3278     if (ctxt->input->cur - ctxt->input->base < len) {
3279         /*
3280          * There were a couple of bugs where PERefs lead to to a change
3281          * of the buffer. Check the buffer size to avoid passing an invalid
3282          * pointer to xmlDictLookup.
3283          */
3284         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3285                     "unexpected change of input buffer");
3286         return (NULL);
3287     }
3288     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3289         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3290     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3291 }
3292 
3293 /**
3294  * xmlParseName:
3295  * @ctxt:  an XML parser context
3296  *
3297  * parse an XML name.
3298  *
3299  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3300  *                  CombiningChar | Extender
3301  *
3302  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3303  *
3304  * [6] Names ::= Name (#x20 Name)*
3305  *
3306  * Returns the Name parsed or NULL
3307  */
3308 
3309 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3310 xmlParseName(xmlParserCtxtPtr ctxt) {
3311     const xmlChar *in;
3312     const xmlChar *ret;
3313     int count = 0;
3314 
3315     GROW;
3316 
3317 #ifdef DEBUG
3318     nbParseName++;
3319 #endif
3320 
3321     /*
3322      * Accelerator for simple ASCII names
3323      */
3324     in = ctxt->input->cur;
3325     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3326 	((*in >= 0x41) && (*in <= 0x5A)) ||
3327 	(*in == '_') || (*in == ':')) {
3328 	in++;
3329 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3330 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3331 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3332 	       (*in == '_') || (*in == '-') ||
3333 	       (*in == ':') || (*in == '.'))
3334 	    in++;
3335 	if ((*in > 0) && (*in < 0x80)) {
3336 	    count = in - ctxt->input->cur;
3337             if ((count > XML_MAX_NAME_LENGTH) &&
3338                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3339                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3340                 return(NULL);
3341             }
3342 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3343 	    ctxt->input->cur = in;
3344 	    ctxt->nbChars += count;
3345 	    ctxt->input->col += count;
3346 	    if (ret == NULL)
3347 	        xmlErrMemory(ctxt, NULL);
3348 	    return(ret);
3349 	}
3350     }
3351     /* accelerator for special cases */
3352     return(xmlParseNameComplex(ctxt));
3353 }
3354 
3355 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3356 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3357     int len = 0, l;
3358     int c;
3359     int count = 0;
3360     size_t startPosition = 0;
3361 
3362 #ifdef DEBUG
3363     nbParseNCNameComplex++;
3364 #endif
3365 
3366     /*
3367      * Handler for more complex cases
3368      */
3369     GROW;
3370     startPosition = CUR_PTR - BASE_PTR;
3371     c = CUR_CHAR(l);
3372     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3373 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3374 	return(NULL);
3375     }
3376 
3377     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3378 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3379 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3380             if ((len > XML_MAX_NAME_LENGTH) &&
3381                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3382                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3383                 return(NULL);
3384             }
3385 	    count = 0;
3386 	    GROW;
3387             if (ctxt->instate == XML_PARSER_EOF)
3388                 return(NULL);
3389 	}
3390 	len += l;
3391 	NEXTL(l);
3392 	c = CUR_CHAR(l);
3393 	if (c == 0) {
3394 	    count = 0;
3395 	    /*
3396 	     * when shrinking to extend the buffer we really need to preserve
3397 	     * the part of the name we already parsed. Hence rolling back
3398 	     * by current lenght.
3399 	     */
3400 	    ctxt->input->cur -= l;
3401 	    GROW;
3402             if (ctxt->instate == XML_PARSER_EOF)
3403                 return(NULL);
3404 	    ctxt->input->cur += l;
3405 	    c = CUR_CHAR(l);
3406 	}
3407     }
3408     if ((len > XML_MAX_NAME_LENGTH) &&
3409         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3410         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3411         return(NULL);
3412     }
3413     return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3414 }
3415 
3416 /**
3417  * xmlParseNCName:
3418  * @ctxt:  an XML parser context
3419  * @len:  length of the string parsed
3420  *
3421  * parse an XML name.
3422  *
3423  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3424  *                      CombiningChar | Extender
3425  *
3426  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3427  *
3428  * Returns the Name parsed or NULL
3429  */
3430 
3431 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3432 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3433     const xmlChar *in, *e;
3434     const xmlChar *ret;
3435     int count = 0;
3436 
3437 #ifdef DEBUG
3438     nbParseNCName++;
3439 #endif
3440 
3441     /*
3442      * Accelerator for simple ASCII names
3443      */
3444     in = ctxt->input->cur;
3445     e = ctxt->input->end;
3446     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3447 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3448 	 (*in == '_')) && (in < e)) {
3449 	in++;
3450 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3451 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3452 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3453 	        (*in == '_') || (*in == '-') ||
3454 	        (*in == '.')) && (in < e))
3455 	    in++;
3456 	if (in >= e)
3457 	    goto complex;
3458 	if ((*in > 0) && (*in < 0x80)) {
3459 	    count = in - ctxt->input->cur;
3460             if ((count > XML_MAX_NAME_LENGTH) &&
3461                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3462                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3463                 return(NULL);
3464             }
3465 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3466 	    ctxt->input->cur = in;
3467 	    ctxt->nbChars += count;
3468 	    ctxt->input->col += count;
3469 	    if (ret == NULL) {
3470 	        xmlErrMemory(ctxt, NULL);
3471 	    }
3472 	    return(ret);
3473 	}
3474     }
3475 complex:
3476     return(xmlParseNCNameComplex(ctxt));
3477 }
3478 
3479 /**
3480  * xmlParseNameAndCompare:
3481  * @ctxt:  an XML parser context
3482  *
3483  * parse an XML name and compares for match
3484  * (specialized for endtag parsing)
3485  *
3486  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3487  * and the name for mismatch
3488  */
3489 
3490 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3491 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3492     register const xmlChar *cmp = other;
3493     register const xmlChar *in;
3494     const xmlChar *ret;
3495 
3496     GROW;
3497     if (ctxt->instate == XML_PARSER_EOF)
3498         return(NULL);
3499 
3500     in = ctxt->input->cur;
3501     while (*in != 0 && *in == *cmp) {
3502 	++in;
3503 	++cmp;
3504 	ctxt->input->col++;
3505     }
3506     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3507 	/* success */
3508 	ctxt->input->cur = in;
3509 	return (const xmlChar*) 1;
3510     }
3511     /* failure (or end of input buffer), check with full function */
3512     ret = xmlParseName (ctxt);
3513     /* strings coming from the dictionary direct compare possible */
3514     if (ret == other) {
3515 	return (const xmlChar*) 1;
3516     }
3517     return ret;
3518 }
3519 
3520 /**
3521  * xmlParseStringName:
3522  * @ctxt:  an XML parser context
3523  * @str:  a pointer to the string pointer (IN/OUT)
3524  *
3525  * parse an XML name.
3526  *
3527  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3528  *                  CombiningChar | Extender
3529  *
3530  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3531  *
3532  * [6] Names ::= Name (#x20 Name)*
3533  *
3534  * Returns the Name parsed or NULL. The @str pointer
3535  * is updated to the current location in the string.
3536  */
3537 
3538 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3539 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3540     xmlChar buf[XML_MAX_NAMELEN + 5];
3541     const xmlChar *cur = *str;
3542     int len = 0, l;
3543     int c;
3544 
3545 #ifdef DEBUG
3546     nbParseStringName++;
3547 #endif
3548 
3549     c = CUR_SCHAR(cur, l);
3550     if (!xmlIsNameStartChar(ctxt, c)) {
3551 	return(NULL);
3552     }
3553 
3554     COPY_BUF(l,buf,len,c);
3555     cur += l;
3556     c = CUR_SCHAR(cur, l);
3557     while (xmlIsNameChar(ctxt, c)) {
3558 	COPY_BUF(l,buf,len,c);
3559 	cur += l;
3560 	c = CUR_SCHAR(cur, l);
3561 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3562 	    /*
3563 	     * Okay someone managed to make a huge name, so he's ready to pay
3564 	     * for the processing speed.
3565 	     */
3566 	    xmlChar *buffer;
3567 	    int max = len * 2;
3568 
3569 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3570 	    if (buffer == NULL) {
3571 	        xmlErrMemory(ctxt, NULL);
3572 		return(NULL);
3573 	    }
3574 	    memcpy(buffer, buf, len);
3575 	    while (xmlIsNameChar(ctxt, c)) {
3576 		if (len + 10 > max) {
3577 		    xmlChar *tmp;
3578 
3579                     if ((len > XML_MAX_NAME_LENGTH) &&
3580                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3581                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3582 			xmlFree(buffer);
3583                         return(NULL);
3584                     }
3585 		    max *= 2;
3586 		    tmp = (xmlChar *) xmlRealloc(buffer,
3587 			                            max * sizeof(xmlChar));
3588 		    if (tmp == NULL) {
3589 			xmlErrMemory(ctxt, NULL);
3590 			xmlFree(buffer);
3591 			return(NULL);
3592 		    }
3593 		    buffer = tmp;
3594 		}
3595 		COPY_BUF(l,buffer,len,c);
3596 		cur += l;
3597 		c = CUR_SCHAR(cur, l);
3598 	    }
3599 	    buffer[len] = 0;
3600 	    *str = cur;
3601 	    return(buffer);
3602 	}
3603     }
3604     if ((len > XML_MAX_NAME_LENGTH) &&
3605         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3606         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3607         return(NULL);
3608     }
3609     *str = cur;
3610     return(xmlStrndup(buf, len));
3611 }
3612 
3613 /**
3614  * xmlParseNmtoken:
3615  * @ctxt:  an XML parser context
3616  *
3617  * parse an XML Nmtoken.
3618  *
3619  * [7] Nmtoken ::= (NameChar)+
3620  *
3621  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3622  *
3623  * Returns the Nmtoken parsed or NULL
3624  */
3625 
3626 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3627 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3628     xmlChar buf[XML_MAX_NAMELEN + 5];
3629     int len = 0, l;
3630     int c;
3631     int count = 0;
3632 
3633 #ifdef DEBUG
3634     nbParseNmToken++;
3635 #endif
3636 
3637     GROW;
3638     if (ctxt->instate == XML_PARSER_EOF)
3639         return(NULL);
3640     c = CUR_CHAR(l);
3641 
3642     while (xmlIsNameChar(ctxt, c)) {
3643 	if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 	    count = 0;
3645 	    GROW;
3646 	}
3647 	COPY_BUF(l,buf,len,c);
3648 	NEXTL(l);
3649 	c = CUR_CHAR(l);
3650 	if (c == 0) {
3651 	    count = 0;
3652 	    GROW;
3653 	    if (ctxt->instate == XML_PARSER_EOF)
3654 		return(NULL);
3655             c = CUR_CHAR(l);
3656 	}
3657 	if (len >= XML_MAX_NAMELEN) {
3658 	    /*
3659 	     * Okay someone managed to make a huge token, so he's ready to pay
3660 	     * for the processing speed.
3661 	     */
3662 	    xmlChar *buffer;
3663 	    int max = len * 2;
3664 
3665 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3666 	    if (buffer == NULL) {
3667 	        xmlErrMemory(ctxt, NULL);
3668 		return(NULL);
3669 	    }
3670 	    memcpy(buffer, buf, len);
3671 	    while (xmlIsNameChar(ctxt, c)) {
3672 		if (count++ > XML_PARSER_CHUNK_SIZE) {
3673 		    count = 0;
3674 		    GROW;
3675                     if (ctxt->instate == XML_PARSER_EOF) {
3676                         xmlFree(buffer);
3677                         return(NULL);
3678                     }
3679 		}
3680 		if (len + 10 > max) {
3681 		    xmlChar *tmp;
3682 
3683                     if ((max > XML_MAX_NAME_LENGTH) &&
3684                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3685                         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3686                         xmlFree(buffer);
3687                         return(NULL);
3688                     }
3689 		    max *= 2;
3690 		    tmp = (xmlChar *) xmlRealloc(buffer,
3691 			                            max * sizeof(xmlChar));
3692 		    if (tmp == NULL) {
3693 			xmlErrMemory(ctxt, NULL);
3694 			xmlFree(buffer);
3695 			return(NULL);
3696 		    }
3697 		    buffer = tmp;
3698 		}
3699 		COPY_BUF(l,buffer,len,c);
3700 		NEXTL(l);
3701 		c = CUR_CHAR(l);
3702 	    }
3703 	    buffer[len] = 0;
3704 	    return(buffer);
3705 	}
3706     }
3707     if (len == 0)
3708         return(NULL);
3709     if ((len > XML_MAX_NAME_LENGTH) &&
3710         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3711         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3712         return(NULL);
3713     }
3714     return(xmlStrndup(buf, len));
3715 }
3716 
3717 /**
3718  * xmlParseEntityValue:
3719  * @ctxt:  an XML parser context
3720  * @orig:  if non-NULL store a copy of the original entity value
3721  *
3722  * parse a value for ENTITY declarations
3723  *
3724  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3725  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3726  *
3727  * Returns the EntityValue parsed with reference substituted or NULL
3728  */
3729 
3730 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3731 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3732     xmlChar *buf = NULL;
3733     int len = 0;
3734     int size = XML_PARSER_BUFFER_SIZE;
3735     int c, l;
3736     xmlChar stop;
3737     xmlChar *ret = NULL;
3738     const xmlChar *cur = NULL;
3739     xmlParserInputPtr input;
3740 
3741     if (RAW == '"') stop = '"';
3742     else if (RAW == '\'') stop = '\'';
3743     else {
3744 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3745 	return(NULL);
3746     }
3747     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3748     if (buf == NULL) {
3749 	xmlErrMemory(ctxt, NULL);
3750 	return(NULL);
3751     }
3752 
3753     /*
3754      * The content of the entity definition is copied in a buffer.
3755      */
3756 
3757     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3758     input = ctxt->input;
3759     GROW;
3760     if (ctxt->instate == XML_PARSER_EOF)
3761         goto error;
3762     NEXT;
3763     c = CUR_CHAR(l);
3764     /*
3765      * NOTE: 4.4.5 Included in Literal
3766      * When a parameter entity reference appears in a literal entity
3767      * value, ... a single or double quote character in the replacement
3768      * text is always treated as a normal data character and will not
3769      * terminate the literal.
3770      * In practice it means we stop the loop only when back at parsing
3771      * the initial entity and the quote is found
3772      */
3773     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3774 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3775 	if (len + 5 >= size) {
3776 	    xmlChar *tmp;
3777 
3778 	    size *= 2;
3779 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3780 	    if (tmp == NULL) {
3781 		xmlErrMemory(ctxt, NULL);
3782                 goto error;
3783 	    }
3784 	    buf = tmp;
3785 	}
3786 	COPY_BUF(l,buf,len,c);
3787 	NEXTL(l);
3788 
3789 	GROW;
3790 	c = CUR_CHAR(l);
3791 	if (c == 0) {
3792 	    GROW;
3793 	    c = CUR_CHAR(l);
3794 	}
3795     }
3796     buf[len] = 0;
3797     if (ctxt->instate == XML_PARSER_EOF)
3798         goto error;
3799     if (c != stop) {
3800         xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3801         goto error;
3802     }
3803     NEXT;
3804 
3805     /*
3806      * Raise problem w.r.t. '&' and '%' being used in non-entities
3807      * reference constructs. Note Charref will be handled in
3808      * xmlStringDecodeEntities()
3809      */
3810     cur = buf;
3811     while (*cur != 0) { /* non input consuming */
3812 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3813 	    xmlChar *name;
3814 	    xmlChar tmp = *cur;
3815             int nameOk = 0;
3816 
3817 	    cur++;
3818 	    name = xmlParseStringName(ctxt, &cur);
3819             if (name != NULL) {
3820                 nameOk = 1;
3821                 xmlFree(name);
3822             }
3823             if ((nameOk == 0) || (*cur != ';')) {
3824 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3825 	    "EntityValue: '%c' forbidden except for entities references\n",
3826 	                          tmp);
3827                 goto error;
3828 	    }
3829 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3830 		(ctxt->inputNr == 1)) {
3831 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3832                 goto error;
3833 	    }
3834 	    if (*cur == 0)
3835 	        break;
3836 	}
3837 	cur++;
3838     }
3839 
3840     /*
3841      * Then PEReference entities are substituted.
3842      *
3843      * NOTE: 4.4.7 Bypassed
3844      * When a general entity reference appears in the EntityValue in
3845      * an entity declaration, it is bypassed and left as is.
3846      * so XML_SUBSTITUTE_REF is not set here.
3847      */
3848     ++ctxt->depth;
3849     ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3850                                   0, 0, 0);
3851     --ctxt->depth;
3852     if (orig != NULL) {
3853         *orig = buf;
3854         buf = NULL;
3855     }
3856 
3857 error:
3858     if (buf != NULL)
3859         xmlFree(buf);
3860     return(ret);
3861 }
3862 
3863 /**
3864  * xmlParseAttValueComplex:
3865  * @ctxt:  an XML parser context
3866  * @len:   the resulting attribute len
3867  * @normalize:  wether to apply the inner normalization
3868  *
3869  * parse a value for an attribute, this is the fallback function
3870  * of xmlParseAttValue() when the attribute parsing requires handling
3871  * of non-ASCII characters, or normalization compaction.
3872  *
3873  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3874  */
3875 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3876 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3877     xmlChar limit = 0;
3878     xmlChar *buf = NULL;
3879     xmlChar *rep = NULL;
3880     size_t len = 0;
3881     size_t buf_size = 0;
3882     int c, l, in_space = 0;
3883     xmlChar *current = NULL;
3884     xmlEntityPtr ent;
3885 
3886     if (NXT(0) == '"') {
3887 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3888 	limit = '"';
3889         NEXT;
3890     } else if (NXT(0) == '\'') {
3891 	limit = '\'';
3892 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3893         NEXT;
3894     } else {
3895 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3896 	return(NULL);
3897     }
3898 
3899     /*
3900      * allocate a translation buffer.
3901      */
3902     buf_size = XML_PARSER_BUFFER_SIZE;
3903     buf = (xmlChar *) xmlMallocAtomic(buf_size);
3904     if (buf == NULL) goto mem_error;
3905 
3906     /*
3907      * OK loop until we reach one of the ending char or a size limit.
3908      */
3909     c = CUR_CHAR(l);
3910     while (((NXT(0) != limit) && /* checked */
3911             (IS_CHAR(c)) && (c != '<')) &&
3912             (ctxt->instate != XML_PARSER_EOF)) {
3913         /*
3914          * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3915          * special option is given
3916          */
3917         if ((len > XML_MAX_TEXT_LENGTH) &&
3918             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3919             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3920                            "AttValue length too long\n");
3921             goto mem_error;
3922         }
3923 	if (c == 0) break;
3924 	if (c == '&') {
3925 	    in_space = 0;
3926 	    if (NXT(1) == '#') {
3927 		int val = xmlParseCharRef(ctxt);
3928 
3929 		if (val == '&') {
3930 		    if (ctxt->replaceEntities) {
3931 			if (len + 10 > buf_size) {
3932 			    growBuffer(buf, 10);
3933 			}
3934 			buf[len++] = '&';
3935 		    } else {
3936 			/*
3937 			 * The reparsing will be done in xmlStringGetNodeList()
3938 			 * called by the attribute() function in SAX.c
3939 			 */
3940 			if (len + 10 > buf_size) {
3941 			    growBuffer(buf, 10);
3942 			}
3943 			buf[len++] = '&';
3944 			buf[len++] = '#';
3945 			buf[len++] = '3';
3946 			buf[len++] = '8';
3947 			buf[len++] = ';';
3948 		    }
3949 		} else if (val != 0) {
3950 		    if (len + 10 > buf_size) {
3951 			growBuffer(buf, 10);
3952 		    }
3953 		    len += xmlCopyChar(0, &buf[len], val);
3954 		}
3955 	    } else {
3956 		ent = xmlParseEntityRef(ctxt);
3957 		ctxt->nbentities++;
3958 		if (ent != NULL)
3959 		    ctxt->nbentities += ent->owner;
3960 		if ((ent != NULL) &&
3961 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3962 		    if (len + 10 > buf_size) {
3963 			growBuffer(buf, 10);
3964 		    }
3965 		    if ((ctxt->replaceEntities == 0) &&
3966 		        (ent->content[0] == '&')) {
3967 			buf[len++] = '&';
3968 			buf[len++] = '#';
3969 			buf[len++] = '3';
3970 			buf[len++] = '8';
3971 			buf[len++] = ';';
3972 		    } else {
3973 			buf[len++] = ent->content[0];
3974 		    }
3975 		} else if ((ent != NULL) &&
3976 		           (ctxt->replaceEntities != 0)) {
3977 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3978 			++ctxt->depth;
3979 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3980 						      XML_SUBSTITUTE_REF,
3981 						      0, 0, 0);
3982 			--ctxt->depth;
3983 			if (rep != NULL) {
3984 			    current = rep;
3985 			    while (*current != 0) { /* non input consuming */
3986                                 if ((*current == 0xD) || (*current == 0xA) ||
3987                                     (*current == 0x9)) {
3988                                     buf[len++] = 0x20;
3989                                     current++;
3990                                 } else
3991                                     buf[len++] = *current++;
3992 				if (len + 10 > buf_size) {
3993 				    growBuffer(buf, 10);
3994 				}
3995 			    }
3996 			    xmlFree(rep);
3997 			    rep = NULL;
3998 			}
3999 		    } else {
4000 			if (len + 10 > buf_size) {
4001 			    growBuffer(buf, 10);
4002 			}
4003 			if (ent->content != NULL)
4004 			    buf[len++] = ent->content[0];
4005 		    }
4006 		} else if (ent != NULL) {
4007 		    int i = xmlStrlen(ent->name);
4008 		    const xmlChar *cur = ent->name;
4009 
4010 		    /*
4011 		     * This may look absurd but is needed to detect
4012 		     * entities problems
4013 		     */
4014 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4015 			(ent->content != NULL) && (ent->checked == 0)) {
4016 			unsigned long oldnbent = ctxt->nbentities;
4017 
4018 			++ctxt->depth;
4019 			rep = xmlStringDecodeEntities(ctxt, ent->content,
4020 						  XML_SUBSTITUTE_REF, 0, 0, 0);
4021 			--ctxt->depth;
4022 
4023 			ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4024 			if (rep != NULL) {
4025 			    if (xmlStrchr(rep, '<'))
4026 			        ent->checked |= 1;
4027 			    xmlFree(rep);
4028 			    rep = NULL;
4029 			} else {
4030                             ent->content[0] = 0;
4031                         }
4032 		    }
4033 
4034 		    /*
4035 		     * Just output the reference
4036 		     */
4037 		    buf[len++] = '&';
4038 		    while (len + i + 10 > buf_size) {
4039 			growBuffer(buf, i + 10);
4040 		    }
4041 		    for (;i > 0;i--)
4042 			buf[len++] = *cur++;
4043 		    buf[len++] = ';';
4044 		}
4045 	    }
4046 	} else {
4047 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4048 	        if ((len != 0) || (!normalize)) {
4049 		    if ((!normalize) || (!in_space)) {
4050 			COPY_BUF(l,buf,len,0x20);
4051 			while (len + 10 > buf_size) {
4052 			    growBuffer(buf, 10);
4053 			}
4054 		    }
4055 		    in_space = 1;
4056 		}
4057 	    } else {
4058 	        in_space = 0;
4059 		COPY_BUF(l,buf,len,c);
4060 		if (len + 10 > buf_size) {
4061 		    growBuffer(buf, 10);
4062 		}
4063 	    }
4064 	    NEXTL(l);
4065 	}
4066 	GROW;
4067 	c = CUR_CHAR(l);
4068     }
4069     if (ctxt->instate == XML_PARSER_EOF)
4070         goto error;
4071 
4072     if ((in_space) && (normalize)) {
4073         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4074     }
4075     buf[len] = 0;
4076     if (RAW == '<') {
4077 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4078     } else if (RAW != limit) {
4079 	if ((c != 0) && (!IS_CHAR(c))) {
4080 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4081 			   "invalid character in attribute value\n");
4082 	} else {
4083 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4084 			   "AttValue: ' expected\n");
4085         }
4086     } else
4087 	NEXT;
4088 
4089     /*
4090      * There we potentially risk an overflow, don't allow attribute value of
4091      * length more than INT_MAX it is a very reasonnable assumption !
4092      */
4093     if (len >= INT_MAX) {
4094         xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4095                        "AttValue length too long\n");
4096         goto mem_error;
4097     }
4098 
4099     if (attlen != NULL) *attlen = (int) len;
4100     return(buf);
4101 
4102 mem_error:
4103     xmlErrMemory(ctxt, NULL);
4104 error:
4105     if (buf != NULL)
4106         xmlFree(buf);
4107     if (rep != NULL)
4108         xmlFree(rep);
4109     return(NULL);
4110 }
4111 
4112 /**
4113  * xmlParseAttValue:
4114  * @ctxt:  an XML parser context
4115  *
4116  * parse a value for an attribute
4117  * Note: the parser won't do substitution of entities here, this
4118  * will be handled later in xmlStringGetNodeList
4119  *
4120  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4121  *                   "'" ([^<&'] | Reference)* "'"
4122  *
4123  * 3.3.3 Attribute-Value Normalization:
4124  * Before the value of an attribute is passed to the application or
4125  * checked for validity, the XML processor must normalize it as follows:
4126  * - a character reference is processed by appending the referenced
4127  *   character to the attribute value
4128  * - an entity reference is processed by recursively processing the
4129  *   replacement text of the entity
4130  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4131  *   appending #x20 to the normalized value, except that only a single
4132  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4133  *   parsed entity or the literal entity value of an internal parsed entity
4134  * - other characters are processed by appending them to the normalized value
4135  * If the declared value is not CDATA, then the XML processor must further
4136  * process the normalized attribute value by discarding any leading and
4137  * trailing space (#x20) characters, and by replacing sequences of space
4138  * (#x20) characters by a single space (#x20) character.
4139  * All attributes for which no declaration has been read should be treated
4140  * by a non-validating parser as if declared CDATA.
4141  *
4142  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4143  */
4144 
4145 
4146 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4147 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4148     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4149     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4150 }
4151 
4152 /**
4153  * xmlParseSystemLiteral:
4154  * @ctxt:  an XML parser context
4155  *
4156  * parse an XML Literal
4157  *
4158  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4159  *
4160  * Returns the SystemLiteral parsed or NULL
4161  */
4162 
4163 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4164 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4165     xmlChar *buf = NULL;
4166     int len = 0;
4167     int size = XML_PARSER_BUFFER_SIZE;
4168     int cur, l;
4169     xmlChar stop;
4170     int state = ctxt->instate;
4171     int count = 0;
4172 
4173     SHRINK;
4174     if (RAW == '"') {
4175         NEXT;
4176 	stop = '"';
4177     } else if (RAW == '\'') {
4178         NEXT;
4179 	stop = '\'';
4180     } else {
4181 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4182 	return(NULL);
4183     }
4184 
4185     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4186     if (buf == NULL) {
4187         xmlErrMemory(ctxt, NULL);
4188 	return(NULL);
4189     }
4190     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4191     cur = CUR_CHAR(l);
4192     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4193 	if (len + 5 >= size) {
4194 	    xmlChar *tmp;
4195 
4196             if ((size > XML_MAX_NAME_LENGTH) &&
4197                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4198                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4199                 xmlFree(buf);
4200 		ctxt->instate = (xmlParserInputState) state;
4201                 return(NULL);
4202             }
4203 	    size *= 2;
4204 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4205 	    if (tmp == NULL) {
4206 	        xmlFree(buf);
4207 		xmlErrMemory(ctxt, NULL);
4208 		ctxt->instate = (xmlParserInputState) state;
4209 		return(NULL);
4210 	    }
4211 	    buf = tmp;
4212 	}
4213 	count++;
4214 	if (count > 50) {
4215 	    GROW;
4216 	    count = 0;
4217             if (ctxt->instate == XML_PARSER_EOF) {
4218 	        xmlFree(buf);
4219 		return(NULL);
4220             }
4221 	}
4222 	COPY_BUF(l,buf,len,cur);
4223 	NEXTL(l);
4224 	cur = CUR_CHAR(l);
4225 	if (cur == 0) {
4226 	    GROW;
4227 	    SHRINK;
4228 	    cur = CUR_CHAR(l);
4229 	}
4230     }
4231     buf[len] = 0;
4232     ctxt->instate = (xmlParserInputState) state;
4233     if (!IS_CHAR(cur)) {
4234 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4235     } else {
4236 	NEXT;
4237     }
4238     return(buf);
4239 }
4240 
4241 /**
4242  * xmlParsePubidLiteral:
4243  * @ctxt:  an XML parser context
4244  *
4245  * parse an XML public literal
4246  *
4247  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4248  *
4249  * Returns the PubidLiteral parsed or NULL.
4250  */
4251 
4252 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4253 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4254     xmlChar *buf = NULL;
4255     int len = 0;
4256     int size = XML_PARSER_BUFFER_SIZE;
4257     xmlChar cur;
4258     xmlChar stop;
4259     int count = 0;
4260     xmlParserInputState oldstate = ctxt->instate;
4261 
4262     SHRINK;
4263     if (RAW == '"') {
4264         NEXT;
4265 	stop = '"';
4266     } else if (RAW == '\'') {
4267         NEXT;
4268 	stop = '\'';
4269     } else {
4270 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4271 	return(NULL);
4272     }
4273     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4274     if (buf == NULL) {
4275 	xmlErrMemory(ctxt, NULL);
4276 	return(NULL);
4277     }
4278     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4279     cur = CUR;
4280     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4281 	if (len + 1 >= size) {
4282 	    xmlChar *tmp;
4283 
4284             if ((size > XML_MAX_NAME_LENGTH) &&
4285                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4286                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4287                 xmlFree(buf);
4288                 return(NULL);
4289             }
4290 	    size *= 2;
4291 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4292 	    if (tmp == NULL) {
4293 		xmlErrMemory(ctxt, NULL);
4294 		xmlFree(buf);
4295 		return(NULL);
4296 	    }
4297 	    buf = tmp;
4298 	}
4299 	buf[len++] = cur;
4300 	count++;
4301 	if (count > 50) {
4302 	    GROW;
4303 	    count = 0;
4304             if (ctxt->instate == XML_PARSER_EOF) {
4305 		xmlFree(buf);
4306 		return(NULL);
4307             }
4308 	}
4309 	NEXT;
4310 	cur = CUR;
4311 	if (cur == 0) {
4312 	    GROW;
4313 	    SHRINK;
4314 	    cur = CUR;
4315 	}
4316     }
4317     buf[len] = 0;
4318     if (cur != stop) {
4319 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320     } else {
4321 	NEXT;
4322     }
4323     ctxt->instate = oldstate;
4324     return(buf);
4325 }
4326 
4327 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4328 
4329 /*
4330  * used for the test in the inner loop of the char data testing
4331  */
4332 static const unsigned char test_char_data[256] = {
4333     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4335     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4338     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4339     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4340     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4341     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4342     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4343     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4344     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4345     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4346     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4347     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4348     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4349     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4350     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4351     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4352     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4353     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4354     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4355     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4365 };
4366 
4367 /**
4368  * xmlParseCharData:
4369  * @ctxt:  an XML parser context
4370  * @cdata:  int indicating whether we are within a CDATA section
4371  *
4372  * parse a CharData section.
4373  * if we are within a CDATA section ']]>' marks an end of section.
4374  *
4375  * The right angle bracket (>) may be represented using the string "&gt;",
4376  * and must, for compatibility, be escaped using "&gt;" or a character
4377  * reference when it appears in the string "]]>" in content, when that
4378  * string is not marking the end of a CDATA section.
4379  *
4380  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4381  */
4382 
4383 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4384 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4385     const xmlChar *in;
4386     int nbchar = 0;
4387     int line = ctxt->input->line;
4388     int col = ctxt->input->col;
4389     int ccol;
4390 
4391     SHRINK;
4392     GROW;
4393     /*
4394      * Accelerated common case where input don't need to be
4395      * modified before passing it to the handler.
4396      */
4397     if (!cdata) {
4398 	in = ctxt->input->cur;
4399 	do {
4400 get_more_space:
4401 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4402 	    if (*in == 0xA) {
4403 		do {
4404 		    ctxt->input->line++; ctxt->input->col = 1;
4405 		    in++;
4406 		} while (*in == 0xA);
4407 		goto get_more_space;
4408 	    }
4409 	    if (*in == '<') {
4410 		nbchar = in - ctxt->input->cur;
4411 		if (nbchar > 0) {
4412 		    const xmlChar *tmp = ctxt->input->cur;
4413 		    ctxt->input->cur = in;
4414 
4415 		    if ((ctxt->sax != NULL) &&
4416 		        (ctxt->sax->ignorableWhitespace !=
4417 		         ctxt->sax->characters)) {
4418 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4419 			    if (ctxt->sax->ignorableWhitespace != NULL)
4420 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4421 						       tmp, nbchar);
4422 			} else {
4423 			    if (ctxt->sax->characters != NULL)
4424 				ctxt->sax->characters(ctxt->userData,
4425 						      tmp, nbchar);
4426 			    if (*ctxt->space == -1)
4427 			        *ctxt->space = -2;
4428 			}
4429 		    } else if ((ctxt->sax != NULL) &&
4430 		               (ctxt->sax->characters != NULL)) {
4431 			ctxt->sax->characters(ctxt->userData,
4432 					      tmp, nbchar);
4433 		    }
4434 		}
4435 		return;
4436 	    }
4437 
4438 get_more:
4439             ccol = ctxt->input->col;
4440 	    while (test_char_data[*in]) {
4441 		in++;
4442 		ccol++;
4443 	    }
4444 	    ctxt->input->col = ccol;
4445 	    if (*in == 0xA) {
4446 		do {
4447 		    ctxt->input->line++; ctxt->input->col = 1;
4448 		    in++;
4449 		} while (*in == 0xA);
4450 		goto get_more;
4451 	    }
4452 	    if (*in == ']') {
4453 		if ((in[1] == ']') && (in[2] == '>')) {
4454 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4455 		    ctxt->input->cur = in + 1;
4456 		    return;
4457 		}
4458 		in++;
4459 		ctxt->input->col++;
4460 		goto get_more;
4461 	    }
4462 	    nbchar = in - ctxt->input->cur;
4463 	    if (nbchar > 0) {
4464 		if ((ctxt->sax != NULL) &&
4465 		    (ctxt->sax->ignorableWhitespace !=
4466 		     ctxt->sax->characters) &&
4467 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4468 		    const xmlChar *tmp = ctxt->input->cur;
4469 		    ctxt->input->cur = in;
4470 
4471 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4472 		        if (ctxt->sax->ignorableWhitespace != NULL)
4473 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4474 							   tmp, nbchar);
4475 		    } else {
4476 		        if (ctxt->sax->characters != NULL)
4477 			    ctxt->sax->characters(ctxt->userData,
4478 						  tmp, nbchar);
4479 			if (*ctxt->space == -1)
4480 			    *ctxt->space = -2;
4481 		    }
4482                     line = ctxt->input->line;
4483                     col = ctxt->input->col;
4484 		} else if (ctxt->sax != NULL) {
4485 		    if (ctxt->sax->characters != NULL)
4486 			ctxt->sax->characters(ctxt->userData,
4487 					      ctxt->input->cur, nbchar);
4488                     line = ctxt->input->line;
4489                     col = ctxt->input->col;
4490 		}
4491                 /* something really bad happened in the SAX callback */
4492                 if (ctxt->instate != XML_PARSER_CONTENT)
4493                     return;
4494 	    }
4495 	    ctxt->input->cur = in;
4496 	    if (*in == 0xD) {
4497 		in++;
4498 		if (*in == 0xA) {
4499 		    ctxt->input->cur = in;
4500 		    in++;
4501 		    ctxt->input->line++; ctxt->input->col = 1;
4502 		    continue; /* while */
4503 		}
4504 		in--;
4505 	    }
4506 	    if (*in == '<') {
4507 		return;
4508 	    }
4509 	    if (*in == '&') {
4510 		return;
4511 	    }
4512 	    SHRINK;
4513 	    GROW;
4514             if (ctxt->instate == XML_PARSER_EOF)
4515 		return;
4516 	    in = ctxt->input->cur;
4517 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4518 	nbchar = 0;
4519     }
4520     ctxt->input->line = line;
4521     ctxt->input->col = col;
4522     xmlParseCharDataComplex(ctxt, cdata);
4523 }
4524 
4525 /**
4526  * xmlParseCharDataComplex:
4527  * @ctxt:  an XML parser context
4528  * @cdata:  int indicating whether we are within a CDATA section
4529  *
4530  * parse a CharData section.this is the fallback function
4531  * of xmlParseCharData() when the parsing requires handling
4532  * of non-ASCII characters.
4533  */
4534 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4535 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4536     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4537     int nbchar = 0;
4538     int cur, l;
4539     int count = 0;
4540 
4541     SHRINK;
4542     GROW;
4543     cur = CUR_CHAR(l);
4544     while ((cur != '<') && /* checked */
4545            (cur != '&') &&
4546 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4547 	if ((cur == ']') && (NXT(1) == ']') &&
4548 	    (NXT(2) == '>')) {
4549 	    if (cdata) break;
4550 	    else {
4551 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4552 	    }
4553 	}
4554 	COPY_BUF(l,buf,nbchar,cur);
4555 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4556 	    buf[nbchar] = 0;
4557 
4558 	    /*
4559 	     * OK the segment is to be consumed as chars.
4560 	     */
4561 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4562 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4563 		    if (ctxt->sax->ignorableWhitespace != NULL)
4564 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4565 			                               buf, nbchar);
4566 		} else {
4567 		    if (ctxt->sax->characters != NULL)
4568 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4569 		    if ((ctxt->sax->characters !=
4570 		         ctxt->sax->ignorableWhitespace) &&
4571 			(*ctxt->space == -1))
4572 			*ctxt->space = -2;
4573 		}
4574 	    }
4575 	    nbchar = 0;
4576             /* something really bad happened in the SAX callback */
4577             if (ctxt->instate != XML_PARSER_CONTENT)
4578                 return;
4579 	}
4580 	count++;
4581 	if (count > 50) {
4582 	    GROW;
4583 	    count = 0;
4584             if (ctxt->instate == XML_PARSER_EOF)
4585 		return;
4586 	}
4587 	NEXTL(l);
4588 	cur = CUR_CHAR(l);
4589     }
4590     if (nbchar != 0) {
4591         buf[nbchar] = 0;
4592 	/*
4593 	 * OK the segment is to be consumed as chars.
4594 	 */
4595 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4596 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4597 		if (ctxt->sax->ignorableWhitespace != NULL)
4598 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4599 	    } else {
4600 		if (ctxt->sax->characters != NULL)
4601 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4602 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4603 		    (*ctxt->space == -1))
4604 		    *ctxt->space = -2;
4605 	    }
4606 	}
4607     }
4608     if ((cur != 0) && (!IS_CHAR(cur))) {
4609 	/* Generate the error and skip the offending character */
4610         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4611                           "PCDATA invalid Char value %d\n",
4612 	                  cur);
4613 	NEXTL(l);
4614     }
4615 }
4616 
4617 /**
4618  * xmlParseExternalID:
4619  * @ctxt:  an XML parser context
4620  * @publicID:  a xmlChar** receiving PubidLiteral
4621  * @strict: indicate whether we should restrict parsing to only
4622  *          production [75], see NOTE below
4623  *
4624  * Parse an External ID or a Public ID
4625  *
4626  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4627  *       'PUBLIC' S PubidLiteral S SystemLiteral
4628  *
4629  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4630  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4631  *
4632  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4633  *
4634  * Returns the function returns SystemLiteral and in the second
4635  *                case publicID receives PubidLiteral, is strict is off
4636  *                it is possible to return NULL and have publicID set.
4637  */
4638 
4639 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4640 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4641     xmlChar *URI = NULL;
4642 
4643     SHRINK;
4644 
4645     *publicID = NULL;
4646     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4647         SKIP(6);
4648 	if (SKIP_BLANKS == 0) {
4649 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4650 	                   "Space required after 'SYSTEM'\n");
4651 	}
4652 	URI = xmlParseSystemLiteral(ctxt);
4653 	if (URI == NULL) {
4654 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4655         }
4656     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4657         SKIP(6);
4658 	if (SKIP_BLANKS == 0) {
4659 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4660 		    "Space required after 'PUBLIC'\n");
4661 	}
4662 	*publicID = xmlParsePubidLiteral(ctxt);
4663 	if (*publicID == NULL) {
4664 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4665 	}
4666 	if (strict) {
4667 	    /*
4668 	     * We don't handle [83] so "S SystemLiteral" is required.
4669 	     */
4670 	    if (SKIP_BLANKS == 0) {
4671 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4672 			"Space required after the Public Identifier\n");
4673 	    }
4674 	} else {
4675 	    /*
4676 	     * We handle [83] so we return immediately, if
4677 	     * "S SystemLiteral" is not detected. We skip blanks if no
4678              * system literal was found, but this is harmless since we must
4679              * be at the end of a NotationDecl.
4680 	     */
4681 	    if (SKIP_BLANKS == 0) return(NULL);
4682 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
4683 	}
4684 	URI = xmlParseSystemLiteral(ctxt);
4685 	if (URI == NULL) {
4686 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4687         }
4688     }
4689     return(URI);
4690 }
4691 
4692 /**
4693  * xmlParseCommentComplex:
4694  * @ctxt:  an XML parser context
4695  * @buf:  the already parsed part of the buffer
4696  * @len:  number of bytes filles in the buffer
4697  * @size:  allocated size of the buffer
4698  *
4699  * Skip an XML (SGML) comment <!-- .... -->
4700  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4701  *  must not occur within comments. "
4702  * This is the slow routine in case the accelerator for ascii didn't work
4703  *
4704  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4705  */
4706 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4707 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4708                        size_t len, size_t size) {
4709     int q, ql;
4710     int r, rl;
4711     int cur, l;
4712     size_t count = 0;
4713     int inputid;
4714 
4715     inputid = ctxt->input->id;
4716 
4717     if (buf == NULL) {
4718         len = 0;
4719 	size = XML_PARSER_BUFFER_SIZE;
4720 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4721 	if (buf == NULL) {
4722 	    xmlErrMemory(ctxt, NULL);
4723 	    return;
4724 	}
4725     }
4726     GROW;	/* Assure there's enough input data */
4727     q = CUR_CHAR(ql);
4728     if (q == 0)
4729         goto not_terminated;
4730     if (!IS_CHAR(q)) {
4731         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4732                           "xmlParseComment: invalid xmlChar value %d\n",
4733 	                  q);
4734 	xmlFree (buf);
4735 	return;
4736     }
4737     NEXTL(ql);
4738     r = CUR_CHAR(rl);
4739     if (r == 0)
4740         goto not_terminated;
4741     if (!IS_CHAR(r)) {
4742         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4743                           "xmlParseComment: invalid xmlChar value %d\n",
4744 	                  q);
4745 	xmlFree (buf);
4746 	return;
4747     }
4748     NEXTL(rl);
4749     cur = CUR_CHAR(l);
4750     if (cur == 0)
4751         goto not_terminated;
4752     while (IS_CHAR(cur) && /* checked */
4753            ((cur != '>') ||
4754 	    (r != '-') || (q != '-'))) {
4755 	if ((r == '-') && (q == '-')) {
4756 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4757 	}
4758         if ((len > XML_MAX_TEXT_LENGTH) &&
4759             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4760             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4761                          "Comment too big found", NULL);
4762             xmlFree (buf);
4763             return;
4764         }
4765 	if (len + 5 >= size) {
4766 	    xmlChar *new_buf;
4767             size_t new_size;
4768 
4769 	    new_size = size * 2;
4770 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4771 	    if (new_buf == NULL) {
4772 		xmlFree (buf);
4773 		xmlErrMemory(ctxt, NULL);
4774 		return;
4775 	    }
4776 	    buf = new_buf;
4777             size = new_size;
4778 	}
4779 	COPY_BUF(ql,buf,len,q);
4780 	q = r;
4781 	ql = rl;
4782 	r = cur;
4783 	rl = l;
4784 
4785 	count++;
4786 	if (count > 50) {
4787 	    GROW;
4788 	    count = 0;
4789             if (ctxt->instate == XML_PARSER_EOF) {
4790 		xmlFree(buf);
4791 		return;
4792             }
4793 	}
4794 	NEXTL(l);
4795 	cur = CUR_CHAR(l);
4796 	if (cur == 0) {
4797 	    SHRINK;
4798 	    GROW;
4799 	    cur = CUR_CHAR(l);
4800 	}
4801     }
4802     buf[len] = 0;
4803     if (cur == 0) {
4804 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4805 	                     "Comment not terminated \n<!--%.50s\n", buf);
4806     } else if (!IS_CHAR(cur)) {
4807         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4808                           "xmlParseComment: invalid xmlChar value %d\n",
4809 	                  cur);
4810     } else {
4811 	if (inputid != ctxt->input->id) {
4812 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4813 		           "Comment doesn't start and stop in the same"
4814                            " entity\n");
4815 	}
4816         NEXT;
4817 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4818 	    (!ctxt->disableSAX))
4819 	    ctxt->sax->comment(ctxt->userData, buf);
4820     }
4821     xmlFree(buf);
4822     return;
4823 not_terminated:
4824     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4825 			 "Comment not terminated\n", NULL);
4826     xmlFree(buf);
4827     return;
4828 }
4829 
4830 /**
4831  * xmlParseComment:
4832  * @ctxt:  an XML parser context
4833  *
4834  * Skip an XML (SGML) comment <!-- .... -->
4835  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4836  *  must not occur within comments. "
4837  *
4838  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4839  */
4840 void
xmlParseComment(xmlParserCtxtPtr ctxt)4841 xmlParseComment(xmlParserCtxtPtr ctxt) {
4842     xmlChar *buf = NULL;
4843     size_t size = XML_PARSER_BUFFER_SIZE;
4844     size_t len = 0;
4845     xmlParserInputState state;
4846     const xmlChar *in;
4847     size_t nbchar = 0;
4848     int ccol;
4849     int inputid;
4850 
4851     /*
4852      * Check that there is a comment right here.
4853      */
4854     if ((RAW != '<') || (NXT(1) != '!') ||
4855         (NXT(2) != '-') || (NXT(3) != '-')) return;
4856     state = ctxt->instate;
4857     ctxt->instate = XML_PARSER_COMMENT;
4858     inputid = ctxt->input->id;
4859     SKIP(4);
4860     SHRINK;
4861     GROW;
4862 
4863     /*
4864      * Accelerated common case where input don't need to be
4865      * modified before passing it to the handler.
4866      */
4867     in = ctxt->input->cur;
4868     do {
4869 	if (*in == 0xA) {
4870 	    do {
4871 		ctxt->input->line++; ctxt->input->col = 1;
4872 		in++;
4873 	    } while (*in == 0xA);
4874 	}
4875 get_more:
4876         ccol = ctxt->input->col;
4877 	while (((*in > '-') && (*in <= 0x7F)) ||
4878 	       ((*in >= 0x20) && (*in < '-')) ||
4879 	       (*in == 0x09)) {
4880 		    in++;
4881 		    ccol++;
4882 	}
4883 	ctxt->input->col = ccol;
4884 	if (*in == 0xA) {
4885 	    do {
4886 		ctxt->input->line++; ctxt->input->col = 1;
4887 		in++;
4888 	    } while (*in == 0xA);
4889 	    goto get_more;
4890 	}
4891 	nbchar = in - ctxt->input->cur;
4892 	/*
4893 	 * save current set of data
4894 	 */
4895 	if (nbchar > 0) {
4896 	    if ((ctxt->sax != NULL) &&
4897 		(ctxt->sax->comment != NULL)) {
4898 		if (buf == NULL) {
4899 		    if ((*in == '-') && (in[1] == '-'))
4900 		        size = nbchar + 1;
4901 		    else
4902 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4903 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4904 		    if (buf == NULL) {
4905 		        xmlErrMemory(ctxt, NULL);
4906 			ctxt->instate = state;
4907 			return;
4908 		    }
4909 		    len = 0;
4910 		} else if (len + nbchar + 1 >= size) {
4911 		    xmlChar *new_buf;
4912 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4913 		    new_buf = (xmlChar *) xmlRealloc(buf,
4914 		                                     size * sizeof(xmlChar));
4915 		    if (new_buf == NULL) {
4916 		        xmlFree (buf);
4917 			xmlErrMemory(ctxt, NULL);
4918 			ctxt->instate = state;
4919 			return;
4920 		    }
4921 		    buf = new_buf;
4922 		}
4923 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4924 		len += nbchar;
4925 		buf[len] = 0;
4926 	    }
4927 	}
4928         if ((len > XML_MAX_TEXT_LENGTH) &&
4929             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4930             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4931                          "Comment too big found", NULL);
4932             xmlFree (buf);
4933             return;
4934         }
4935 	ctxt->input->cur = in;
4936 	if (*in == 0xA) {
4937 	    in++;
4938 	    ctxt->input->line++; ctxt->input->col = 1;
4939 	}
4940 	if (*in == 0xD) {
4941 	    in++;
4942 	    if (*in == 0xA) {
4943 		ctxt->input->cur = in;
4944 		in++;
4945 		ctxt->input->line++; ctxt->input->col = 1;
4946 		continue; /* while */
4947 	    }
4948 	    in--;
4949 	}
4950 	SHRINK;
4951 	GROW;
4952         if (ctxt->instate == XML_PARSER_EOF) {
4953             xmlFree(buf);
4954             return;
4955         }
4956 	in = ctxt->input->cur;
4957 	if (*in == '-') {
4958 	    if (in[1] == '-') {
4959 	        if (in[2] == '>') {
4960 		    if (ctxt->input->id != inputid) {
4961 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4962 			               "comment doesn't start and stop in the"
4963                                        " same entity\n");
4964 		    }
4965 		    SKIP(3);
4966 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4967 		        (!ctxt->disableSAX)) {
4968 			if (buf != NULL)
4969 			    ctxt->sax->comment(ctxt->userData, buf);
4970 			else
4971 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4972 		    }
4973 		    if (buf != NULL)
4974 		        xmlFree(buf);
4975 		    if (ctxt->instate != XML_PARSER_EOF)
4976 			ctxt->instate = state;
4977 		    return;
4978 		}
4979 		if (buf != NULL) {
4980 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4981 		                      "Double hyphen within comment: "
4982                                       "<!--%.50s\n",
4983 				      buf);
4984 		} else
4985 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4986 		                      "Double hyphen within comment\n", NULL);
4987 		in++;
4988 		ctxt->input->col++;
4989 	    }
4990 	    in++;
4991 	    ctxt->input->col++;
4992 	    goto get_more;
4993 	}
4994     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4995     xmlParseCommentComplex(ctxt, buf, len, size);
4996     ctxt->instate = state;
4997     return;
4998 }
4999 
5000 
5001 /**
5002  * xmlParsePITarget:
5003  * @ctxt:  an XML parser context
5004  *
5005  * parse the name of a PI
5006  *
5007  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5008  *
5009  * Returns the PITarget name or NULL
5010  */
5011 
5012 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5013 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5014     const xmlChar *name;
5015 
5016     name = xmlParseName(ctxt);
5017     if ((name != NULL) &&
5018         ((name[0] == 'x') || (name[0] == 'X')) &&
5019         ((name[1] == 'm') || (name[1] == 'M')) &&
5020         ((name[2] == 'l') || (name[2] == 'L'))) {
5021 	int i;
5022 	if ((name[0] == 'x') && (name[1] == 'm') &&
5023 	    (name[2] == 'l') && (name[3] == 0)) {
5024 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5025 		 "XML declaration allowed only at the start of the document\n");
5026 	    return(name);
5027 	} else if (name[3] == 0) {
5028 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5029 	    return(name);
5030 	}
5031 	for (i = 0;;i++) {
5032 	    if (xmlW3CPIs[i] == NULL) break;
5033 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5034 	        return(name);
5035 	}
5036 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5037 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5038 		      NULL, NULL);
5039     }
5040     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5041 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5042 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5043     }
5044     return(name);
5045 }
5046 
5047 #ifdef LIBXML_CATALOG_ENABLED
5048 /**
5049  * xmlParseCatalogPI:
5050  * @ctxt:  an XML parser context
5051  * @catalog:  the PI value string
5052  *
5053  * parse an XML Catalog Processing Instruction.
5054  *
5055  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5056  *
5057  * Occurs only if allowed by the user and if happening in the Misc
5058  * part of the document before any doctype informations
5059  * This will add the given catalog to the parsing context in order
5060  * to be used if there is a resolution need further down in the document
5061  */
5062 
5063 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5064 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5065     xmlChar *URL = NULL;
5066     const xmlChar *tmp, *base;
5067     xmlChar marker;
5068 
5069     tmp = catalog;
5070     while (IS_BLANK_CH(*tmp)) tmp++;
5071     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5072 	goto error;
5073     tmp += 7;
5074     while (IS_BLANK_CH(*tmp)) tmp++;
5075     if (*tmp != '=') {
5076 	return;
5077     }
5078     tmp++;
5079     while (IS_BLANK_CH(*tmp)) tmp++;
5080     marker = *tmp;
5081     if ((marker != '\'') && (marker != '"'))
5082 	goto error;
5083     tmp++;
5084     base = tmp;
5085     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5086     if (*tmp == 0)
5087 	goto error;
5088     URL = xmlStrndup(base, tmp - base);
5089     tmp++;
5090     while (IS_BLANK_CH(*tmp)) tmp++;
5091     if (*tmp != 0)
5092 	goto error;
5093 
5094     if (URL != NULL) {
5095 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5096 	xmlFree(URL);
5097     }
5098     return;
5099 
5100 error:
5101     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5102 	          "Catalog PI syntax error: %s\n",
5103 		  catalog, NULL);
5104     if (URL != NULL)
5105 	xmlFree(URL);
5106 }
5107 #endif
5108 
5109 /**
5110  * xmlParsePI:
5111  * @ctxt:  an XML parser context
5112  *
5113  * parse an XML Processing Instruction.
5114  *
5115  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5116  *
5117  * The processing is transfered to SAX once parsed.
5118  */
5119 
5120 void
xmlParsePI(xmlParserCtxtPtr ctxt)5121 xmlParsePI(xmlParserCtxtPtr ctxt) {
5122     xmlChar *buf = NULL;
5123     size_t len = 0;
5124     size_t size = XML_PARSER_BUFFER_SIZE;
5125     int cur, l;
5126     const xmlChar *target;
5127     xmlParserInputState state;
5128     int count = 0;
5129 
5130     if ((RAW == '<') && (NXT(1) == '?')) {
5131 	int inputid = ctxt->input->id;
5132 	state = ctxt->instate;
5133         ctxt->instate = XML_PARSER_PI;
5134 	/*
5135 	 * this is a Processing Instruction.
5136 	 */
5137 	SKIP(2);
5138 	SHRINK;
5139 
5140 	/*
5141 	 * Parse the target name and check for special support like
5142 	 * namespace.
5143 	 */
5144         target = xmlParsePITarget(ctxt);
5145 	if (target != NULL) {
5146 	    if ((RAW == '?') && (NXT(1) == '>')) {
5147 		if (inputid != ctxt->input->id) {
5148 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5149 	                           "PI declaration doesn't start and stop in"
5150                                    " the same entity\n");
5151 		}
5152 		SKIP(2);
5153 
5154 		/*
5155 		 * SAX: PI detected.
5156 		 */
5157 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5158 		    (ctxt->sax->processingInstruction != NULL))
5159 		    ctxt->sax->processingInstruction(ctxt->userData,
5160 		                                     target, NULL);
5161 		if (ctxt->instate != XML_PARSER_EOF)
5162 		    ctxt->instate = state;
5163 		return;
5164 	    }
5165 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5166 	    if (buf == NULL) {
5167 		xmlErrMemory(ctxt, NULL);
5168 		ctxt->instate = state;
5169 		return;
5170 	    }
5171 	    if (SKIP_BLANKS == 0) {
5172 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5173 			  "ParsePI: PI %s space expected\n", target);
5174 	    }
5175 	    cur = CUR_CHAR(l);
5176 	    while (IS_CHAR(cur) && /* checked */
5177 		   ((cur != '?') || (NXT(1) != '>'))) {
5178 		if (len + 5 >= size) {
5179 		    xmlChar *tmp;
5180                     size_t new_size = size * 2;
5181 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5182 		    if (tmp == NULL) {
5183 			xmlErrMemory(ctxt, NULL);
5184 			xmlFree(buf);
5185 			ctxt->instate = state;
5186 			return;
5187 		    }
5188 		    buf = tmp;
5189                     size = new_size;
5190 		}
5191 		count++;
5192 		if (count > 50) {
5193 		    GROW;
5194                     if (ctxt->instate == XML_PARSER_EOF) {
5195                         xmlFree(buf);
5196                         return;
5197                     }
5198 		    count = 0;
5199                     if ((len > XML_MAX_TEXT_LENGTH) &&
5200                         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5201                         xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5202                                           "PI %s too big found", target);
5203                         xmlFree(buf);
5204                         ctxt->instate = state;
5205                         return;
5206                     }
5207 		}
5208 		COPY_BUF(l,buf,len,cur);
5209 		NEXTL(l);
5210 		cur = CUR_CHAR(l);
5211 		if (cur == 0) {
5212 		    SHRINK;
5213 		    GROW;
5214 		    cur = CUR_CHAR(l);
5215 		}
5216 	    }
5217             if ((len > XML_MAX_TEXT_LENGTH) &&
5218                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5219                 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5220                                   "PI %s too big found", target);
5221                 xmlFree(buf);
5222                 ctxt->instate = state;
5223                 return;
5224             }
5225 	    buf[len] = 0;
5226 	    if (cur != '?') {
5227 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5228 		      "ParsePI: PI %s never end ...\n", target);
5229 	    } else {
5230 		if (inputid != ctxt->input->id) {
5231 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5232 	                           "PI declaration doesn't start and stop in"
5233                                    " the same entity\n");
5234 		}
5235 		SKIP(2);
5236 
5237 #ifdef LIBXML_CATALOG_ENABLED
5238 		if (((state == XML_PARSER_MISC) ||
5239 	             (state == XML_PARSER_START)) &&
5240 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5241 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5242 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5243 			(allow == XML_CATA_ALLOW_ALL))
5244 			xmlParseCatalogPI(ctxt, buf);
5245 		}
5246 #endif
5247 
5248 
5249 		/*
5250 		 * SAX: PI detected.
5251 		 */
5252 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5253 		    (ctxt->sax->processingInstruction != NULL))
5254 		    ctxt->sax->processingInstruction(ctxt->userData,
5255 		                                     target, buf);
5256 	    }
5257 	    xmlFree(buf);
5258 	} else {
5259 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5260 	}
5261 	if (ctxt->instate != XML_PARSER_EOF)
5262 	    ctxt->instate = state;
5263     }
5264 }
5265 
5266 /**
5267  * xmlParseNotationDecl:
5268  * @ctxt:  an XML parser context
5269  *
5270  * parse a notation declaration
5271  *
5272  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5273  *
5274  * Hence there is actually 3 choices:
5275  *     'PUBLIC' S PubidLiteral
5276  *     'PUBLIC' S PubidLiteral S SystemLiteral
5277  * and 'SYSTEM' S SystemLiteral
5278  *
5279  * See the NOTE on xmlParseExternalID().
5280  */
5281 
5282 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5283 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5284     const xmlChar *name;
5285     xmlChar *Pubid;
5286     xmlChar *Systemid;
5287 
5288     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5289 	int inputid = ctxt->input->id;
5290 	SHRINK;
5291 	SKIP(10);
5292 	if (SKIP_BLANKS == 0) {
5293 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 			   "Space required after '<!NOTATION'\n");
5295 	    return;
5296 	}
5297 
5298         name = xmlParseName(ctxt);
5299 	if (name == NULL) {
5300 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5301 	    return;
5302 	}
5303 	if (xmlStrchr(name, ':') != NULL) {
5304 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5305 		     "colons are forbidden from notation names '%s'\n",
5306 		     name, NULL, NULL);
5307 	}
5308 	if (SKIP_BLANKS == 0) {
5309 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5310 		     "Space required after the NOTATION name'\n");
5311 	    return;
5312 	}
5313 
5314 	/*
5315 	 * Parse the IDs.
5316 	 */
5317 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5318 	SKIP_BLANKS;
5319 
5320 	if (RAW == '>') {
5321 	    if (inputid != ctxt->input->id) {
5322 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5323 	                       "Notation declaration doesn't start and stop"
5324                                " in the same entity\n");
5325 	    }
5326 	    NEXT;
5327 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5328 		(ctxt->sax->notationDecl != NULL))
5329 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5330 	} else {
5331 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5332 	}
5333 	if (Systemid != NULL) xmlFree(Systemid);
5334 	if (Pubid != NULL) xmlFree(Pubid);
5335     }
5336 }
5337 
5338 /**
5339  * xmlParseEntityDecl:
5340  * @ctxt:  an XML parser context
5341  *
5342  * parse <!ENTITY declarations
5343  *
5344  * [70] EntityDecl ::= GEDecl | PEDecl
5345  *
5346  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5347  *
5348  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5349  *
5350  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5351  *
5352  * [74] PEDef ::= EntityValue | ExternalID
5353  *
5354  * [76] NDataDecl ::= S 'NDATA' S Name
5355  *
5356  * [ VC: Notation Declared ]
5357  * The Name must match the declared name of a notation.
5358  */
5359 
5360 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5361 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5362     const xmlChar *name = NULL;
5363     xmlChar *value = NULL;
5364     xmlChar *URI = NULL, *literal = NULL;
5365     const xmlChar *ndata = NULL;
5366     int isParameter = 0;
5367     xmlChar *orig = NULL;
5368 
5369     /* GROW; done in the caller */
5370     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5371 	int inputid = ctxt->input->id;
5372 	SHRINK;
5373 	SKIP(8);
5374 	if (SKIP_BLANKS == 0) {
5375 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 			   "Space required after '<!ENTITY'\n");
5377 	}
5378 
5379 	if (RAW == '%') {
5380 	    NEXT;
5381 	    if (SKIP_BLANKS == 0) {
5382 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5383 			       "Space required after '%%'\n");
5384 	    }
5385 	    isParameter = 1;
5386 	}
5387 
5388         name = xmlParseName(ctxt);
5389 	if (name == NULL) {
5390 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5391 	                   "xmlParseEntityDecl: no name\n");
5392             return;
5393 	}
5394 	if (xmlStrchr(name, ':') != NULL) {
5395 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396 		     "colons are forbidden from entities names '%s'\n",
5397 		     name, NULL, NULL);
5398 	}
5399 	if (SKIP_BLANKS == 0) {
5400 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5401 			   "Space required after the entity name\n");
5402 	}
5403 
5404 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5405 	/*
5406 	 * handle the various case of definitions...
5407 	 */
5408 	if (isParameter) {
5409 	    if ((RAW == '"') || (RAW == '\'')) {
5410 	        value = xmlParseEntityValue(ctxt, &orig);
5411 		if (value) {
5412 		    if ((ctxt->sax != NULL) &&
5413 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5414 			ctxt->sax->entityDecl(ctxt->userData, name,
5415 		                    XML_INTERNAL_PARAMETER_ENTITY,
5416 				    NULL, NULL, value);
5417 		}
5418 	    } else {
5419 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5420 		if ((URI == NULL) && (literal == NULL)) {
5421 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5422 		}
5423 		if (URI) {
5424 		    xmlURIPtr uri;
5425 
5426 		    uri = xmlParseURI((const char *) URI);
5427 		    if (uri == NULL) {
5428 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5429 				     "Invalid URI: %s\n", URI);
5430 			/*
5431 			 * This really ought to be a well formedness error
5432 			 * but the XML Core WG decided otherwise c.f. issue
5433 			 * E26 of the XML erratas.
5434 			 */
5435 		    } else {
5436 			if (uri->fragment != NULL) {
5437 			    /*
5438 			     * Okay this is foolish to block those but not
5439 			     * invalid URIs.
5440 			     */
5441 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5442 			} else {
5443 			    if ((ctxt->sax != NULL) &&
5444 				(!ctxt->disableSAX) &&
5445 				(ctxt->sax->entityDecl != NULL))
5446 				ctxt->sax->entityDecl(ctxt->userData, name,
5447 					    XML_EXTERNAL_PARAMETER_ENTITY,
5448 					    literal, URI, NULL);
5449 			}
5450 			xmlFreeURI(uri);
5451 		    }
5452 		}
5453 	    }
5454 	} else {
5455 	    if ((RAW == '"') || (RAW == '\'')) {
5456 	        value = xmlParseEntityValue(ctxt, &orig);
5457 		if ((ctxt->sax != NULL) &&
5458 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5459 		    ctxt->sax->entityDecl(ctxt->userData, name,
5460 				XML_INTERNAL_GENERAL_ENTITY,
5461 				NULL, NULL, value);
5462 		/*
5463 		 * For expat compatibility in SAX mode.
5464 		 */
5465 		if ((ctxt->myDoc == NULL) ||
5466 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5467 		    if (ctxt->myDoc == NULL) {
5468 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5469 			if (ctxt->myDoc == NULL) {
5470 			    xmlErrMemory(ctxt, "New Doc failed");
5471 			    return;
5472 			}
5473 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5474 		    }
5475 		    if (ctxt->myDoc->intSubset == NULL)
5476 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5477 					    BAD_CAST "fake", NULL, NULL);
5478 
5479 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5480 			              NULL, NULL, value);
5481 		}
5482 	    } else {
5483 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5484 		if ((URI == NULL) && (literal == NULL)) {
5485 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5486 		}
5487 		if (URI) {
5488 		    xmlURIPtr uri;
5489 
5490 		    uri = xmlParseURI((const char *)URI);
5491 		    if (uri == NULL) {
5492 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5493 				     "Invalid URI: %s\n", URI);
5494 			/*
5495 			 * This really ought to be a well formedness error
5496 			 * but the XML Core WG decided otherwise c.f. issue
5497 			 * E26 of the XML erratas.
5498 			 */
5499 		    } else {
5500 			if (uri->fragment != NULL) {
5501 			    /*
5502 			     * Okay this is foolish to block those but not
5503 			     * invalid URIs.
5504 			     */
5505 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5506 			}
5507 			xmlFreeURI(uri);
5508 		    }
5509 		}
5510 		if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5511 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5512 				   "Space required before 'NDATA'\n");
5513 		}
5514 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5515 		    SKIP(5);
5516 		    if (SKIP_BLANKS == 0) {
5517 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5518 				       "Space required after 'NDATA'\n");
5519 		    }
5520 		    ndata = xmlParseName(ctxt);
5521 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5522 		        (ctxt->sax->unparsedEntityDecl != NULL))
5523 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5524 				    literal, URI, ndata);
5525 		} else {
5526 		    if ((ctxt->sax != NULL) &&
5527 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5528 			ctxt->sax->entityDecl(ctxt->userData, name,
5529 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5530 				    literal, URI, NULL);
5531 		    /*
5532 		     * For expat compatibility in SAX mode.
5533 		     * assuming the entity repalcement was asked for
5534 		     */
5535 		    if ((ctxt->replaceEntities != 0) &&
5536 			((ctxt->myDoc == NULL) ||
5537 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5538 			if (ctxt->myDoc == NULL) {
5539 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5540 			    if (ctxt->myDoc == NULL) {
5541 			        xmlErrMemory(ctxt, "New Doc failed");
5542 				return;
5543 			    }
5544 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5545 			}
5546 
5547 			if (ctxt->myDoc->intSubset == NULL)
5548 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5549 						BAD_CAST "fake", NULL, NULL);
5550 			xmlSAX2EntityDecl(ctxt, name,
5551 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5552 				          literal, URI, NULL);
5553 		    }
5554 		}
5555 	    }
5556 	}
5557 	if (ctxt->instate == XML_PARSER_EOF)
5558 	    goto done;
5559 	SKIP_BLANKS;
5560 	if (RAW != '>') {
5561 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5562 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5563 	    xmlHaltParser(ctxt);
5564 	} else {
5565 	    if (inputid != ctxt->input->id) {
5566 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5567 	                       "Entity declaration doesn't start and stop in"
5568                                " the same entity\n");
5569 	    }
5570 	    NEXT;
5571 	}
5572 	if (orig != NULL) {
5573 	    /*
5574 	     * Ugly mechanism to save the raw entity value.
5575 	     */
5576 	    xmlEntityPtr cur = NULL;
5577 
5578 	    if (isParameter) {
5579 	        if ((ctxt->sax != NULL) &&
5580 		    (ctxt->sax->getParameterEntity != NULL))
5581 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5582 	    } else {
5583 	        if ((ctxt->sax != NULL) &&
5584 		    (ctxt->sax->getEntity != NULL))
5585 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5586 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5587 		    cur = xmlSAX2GetEntity(ctxt, name);
5588 		}
5589 	    }
5590             if ((cur != NULL) && (cur->orig == NULL)) {
5591 		cur->orig = orig;
5592                 orig = NULL;
5593 	    }
5594 	}
5595 
5596 done:
5597 	if (value != NULL) xmlFree(value);
5598 	if (URI != NULL) xmlFree(URI);
5599 	if (literal != NULL) xmlFree(literal);
5600         if (orig != NULL) xmlFree(orig);
5601     }
5602 }
5603 
5604 /**
5605  * xmlParseDefaultDecl:
5606  * @ctxt:  an XML parser context
5607  * @value:  Receive a possible fixed default value for the attribute
5608  *
5609  * Parse an attribute default declaration
5610  *
5611  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5612  *
5613  * [ VC: Required Attribute ]
5614  * if the default declaration is the keyword #REQUIRED, then the
5615  * attribute must be specified for all elements of the type in the
5616  * attribute-list declaration.
5617  *
5618  * [ VC: Attribute Default Legal ]
5619  * The declared default value must meet the lexical constraints of
5620  * the declared attribute type c.f. xmlValidateAttributeDecl()
5621  *
5622  * [ VC: Fixed Attribute Default ]
5623  * if an attribute has a default value declared with the #FIXED
5624  * keyword, instances of that attribute must match the default value.
5625  *
5626  * [ WFC: No < in Attribute Values ]
5627  * handled in xmlParseAttValue()
5628  *
5629  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5630  *          or XML_ATTRIBUTE_FIXED.
5631  */
5632 
5633 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5634 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5635     int val;
5636     xmlChar *ret;
5637 
5638     *value = NULL;
5639     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5640 	SKIP(9);
5641 	return(XML_ATTRIBUTE_REQUIRED);
5642     }
5643     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5644 	SKIP(8);
5645 	return(XML_ATTRIBUTE_IMPLIED);
5646     }
5647     val = XML_ATTRIBUTE_NONE;
5648     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5649 	SKIP(6);
5650 	val = XML_ATTRIBUTE_FIXED;
5651 	if (SKIP_BLANKS == 0) {
5652 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5653 			   "Space required after '#FIXED'\n");
5654 	}
5655     }
5656     ret = xmlParseAttValue(ctxt);
5657     ctxt->instate = XML_PARSER_DTD;
5658     if (ret == NULL) {
5659 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5660 		       "Attribute default value declaration error\n");
5661     } else
5662         *value = ret;
5663     return(val);
5664 }
5665 
5666 /**
5667  * xmlParseNotationType:
5668  * @ctxt:  an XML parser context
5669  *
5670  * parse an Notation attribute type.
5671  *
5672  * Note: the leading 'NOTATION' S part has already being parsed...
5673  *
5674  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5675  *
5676  * [ VC: Notation Attributes ]
5677  * Values of this type must match one of the notation names included
5678  * in the declaration; all notation names in the declaration must be declared.
5679  *
5680  * Returns: the notation attribute tree built while parsing
5681  */
5682 
5683 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5684 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5685     const xmlChar *name;
5686     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5687 
5688     if (RAW != '(') {
5689 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5690 	return(NULL);
5691     }
5692     SHRINK;
5693     do {
5694         NEXT;
5695 	SKIP_BLANKS;
5696         name = xmlParseName(ctxt);
5697 	if (name == NULL) {
5698 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5699 			   "Name expected in NOTATION declaration\n");
5700             xmlFreeEnumeration(ret);
5701 	    return(NULL);
5702 	}
5703 	tmp = ret;
5704 	while (tmp != NULL) {
5705 	    if (xmlStrEqual(name, tmp->name)) {
5706 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5707 	  "standalone: attribute notation value token %s duplicated\n",
5708 				 name, NULL);
5709 		if (!xmlDictOwns(ctxt->dict, name))
5710 		    xmlFree((xmlChar *) name);
5711 		break;
5712 	    }
5713 	    tmp = tmp->next;
5714 	}
5715 	if (tmp == NULL) {
5716 	    cur = xmlCreateEnumeration(name);
5717 	    if (cur == NULL) {
5718                 xmlFreeEnumeration(ret);
5719                 return(NULL);
5720             }
5721 	    if (last == NULL) ret = last = cur;
5722 	    else {
5723 		last->next = cur;
5724 		last = cur;
5725 	    }
5726 	}
5727 	SKIP_BLANKS;
5728     } while (RAW == '|');
5729     if (RAW != ')') {
5730 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5731         xmlFreeEnumeration(ret);
5732 	return(NULL);
5733     }
5734     NEXT;
5735     return(ret);
5736 }
5737 
5738 /**
5739  * xmlParseEnumerationType:
5740  * @ctxt:  an XML parser context
5741  *
5742  * parse an Enumeration attribute type.
5743  *
5744  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5745  *
5746  * [ VC: Enumeration ]
5747  * Values of this type must match one of the Nmtoken tokens in
5748  * the declaration
5749  *
5750  * Returns: the enumeration attribute tree built while parsing
5751  */
5752 
5753 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5754 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5755     xmlChar *name;
5756     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5757 
5758     if (RAW != '(') {
5759 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5760 	return(NULL);
5761     }
5762     SHRINK;
5763     do {
5764         NEXT;
5765 	SKIP_BLANKS;
5766         name = xmlParseNmtoken(ctxt);
5767 	if (name == NULL) {
5768 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5769 	    return(ret);
5770 	}
5771 	tmp = ret;
5772 	while (tmp != NULL) {
5773 	    if (xmlStrEqual(name, tmp->name)) {
5774 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5775 	  "standalone: attribute enumeration value token %s duplicated\n",
5776 				 name, NULL);
5777 		if (!xmlDictOwns(ctxt->dict, name))
5778 		    xmlFree(name);
5779 		break;
5780 	    }
5781 	    tmp = tmp->next;
5782 	}
5783 	if (tmp == NULL) {
5784 	    cur = xmlCreateEnumeration(name);
5785 	    if (!xmlDictOwns(ctxt->dict, name))
5786 		xmlFree(name);
5787 	    if (cur == NULL) {
5788                 xmlFreeEnumeration(ret);
5789                 return(NULL);
5790             }
5791 	    if (last == NULL) ret = last = cur;
5792 	    else {
5793 		last->next = cur;
5794 		last = cur;
5795 	    }
5796 	}
5797 	SKIP_BLANKS;
5798     } while (RAW == '|');
5799     if (RAW != ')') {
5800 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5801 	return(ret);
5802     }
5803     NEXT;
5804     return(ret);
5805 }
5806 
5807 /**
5808  * xmlParseEnumeratedType:
5809  * @ctxt:  an XML parser context
5810  * @tree:  the enumeration tree built while parsing
5811  *
5812  * parse an Enumerated attribute type.
5813  *
5814  * [57] EnumeratedType ::= NotationType | Enumeration
5815  *
5816  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5817  *
5818  *
5819  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5820  */
5821 
5822 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5823 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5824     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5825 	SKIP(8);
5826 	if (SKIP_BLANKS == 0) {
5827 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5828 			   "Space required after 'NOTATION'\n");
5829 	    return(0);
5830 	}
5831 	*tree = xmlParseNotationType(ctxt);
5832 	if (*tree == NULL) return(0);
5833 	return(XML_ATTRIBUTE_NOTATION);
5834     }
5835     *tree = xmlParseEnumerationType(ctxt);
5836     if (*tree == NULL) return(0);
5837     return(XML_ATTRIBUTE_ENUMERATION);
5838 }
5839 
5840 /**
5841  * xmlParseAttributeType:
5842  * @ctxt:  an XML parser context
5843  * @tree:  the enumeration tree built while parsing
5844  *
5845  * parse the Attribute list def for an element
5846  *
5847  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5848  *
5849  * [55] StringType ::= 'CDATA'
5850  *
5851  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5852  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5853  *
5854  * Validity constraints for attribute values syntax are checked in
5855  * xmlValidateAttributeValue()
5856  *
5857  * [ VC: ID ]
5858  * Values of type ID must match the Name production. A name must not
5859  * appear more than once in an XML document as a value of this type;
5860  * i.e., ID values must uniquely identify the elements which bear them.
5861  *
5862  * [ VC: One ID per Element Type ]
5863  * No element type may have more than one ID attribute specified.
5864  *
5865  * [ VC: ID Attribute Default ]
5866  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5867  *
5868  * [ VC: IDREF ]
5869  * Values of type IDREF must match the Name production, and values
5870  * of type IDREFS must match Names; each IDREF Name must match the value
5871  * of an ID attribute on some element in the XML document; i.e. IDREF
5872  * values must match the value of some ID attribute.
5873  *
5874  * [ VC: Entity Name ]
5875  * Values of type ENTITY must match the Name production, values
5876  * of type ENTITIES must match Names; each Entity Name must match the
5877  * name of an unparsed entity declared in the DTD.
5878  *
5879  * [ VC: Name Token ]
5880  * Values of type NMTOKEN must match the Nmtoken production; values
5881  * of type NMTOKENS must match Nmtokens.
5882  *
5883  * Returns the attribute type
5884  */
5885 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5886 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5887     SHRINK;
5888     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5889 	SKIP(5);
5890 	return(XML_ATTRIBUTE_CDATA);
5891      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5892 	SKIP(6);
5893 	return(XML_ATTRIBUTE_IDREFS);
5894      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5895 	SKIP(5);
5896 	return(XML_ATTRIBUTE_IDREF);
5897      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5898         SKIP(2);
5899 	return(XML_ATTRIBUTE_ID);
5900      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5901 	SKIP(6);
5902 	return(XML_ATTRIBUTE_ENTITY);
5903      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5904 	SKIP(8);
5905 	return(XML_ATTRIBUTE_ENTITIES);
5906      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5907 	SKIP(8);
5908 	return(XML_ATTRIBUTE_NMTOKENS);
5909      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5910 	SKIP(7);
5911 	return(XML_ATTRIBUTE_NMTOKEN);
5912      }
5913      return(xmlParseEnumeratedType(ctxt, tree));
5914 }
5915 
5916 /**
5917  * xmlParseAttributeListDecl:
5918  * @ctxt:  an XML parser context
5919  *
5920  * : parse the Attribute list def for an element
5921  *
5922  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5923  *
5924  * [53] AttDef ::= S Name S AttType S DefaultDecl
5925  *
5926  */
5927 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5928 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5929     const xmlChar *elemName;
5930     const xmlChar *attrName;
5931     xmlEnumerationPtr tree;
5932 
5933     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5934 	int inputid = ctxt->input->id;
5935 
5936 	SKIP(9);
5937 	if (SKIP_BLANKS == 0) {
5938 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5939 		                 "Space required after '<!ATTLIST'\n");
5940 	}
5941         elemName = xmlParseName(ctxt);
5942 	if (elemName == NULL) {
5943 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5944 			   "ATTLIST: no name for Element\n");
5945 	    return;
5946 	}
5947 	SKIP_BLANKS;
5948 	GROW;
5949 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5950 	    int type;
5951 	    int def;
5952 	    xmlChar *defaultValue = NULL;
5953 
5954 	    GROW;
5955             tree = NULL;
5956 	    attrName = xmlParseName(ctxt);
5957 	    if (attrName == NULL) {
5958 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5959 			       "ATTLIST: no name for Attribute\n");
5960 		break;
5961 	    }
5962 	    GROW;
5963 	    if (SKIP_BLANKS == 0) {
5964 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5965 		        "Space required after the attribute name\n");
5966 		break;
5967 	    }
5968 
5969 	    type = xmlParseAttributeType(ctxt, &tree);
5970 	    if (type <= 0) {
5971 	        break;
5972 	    }
5973 
5974 	    GROW;
5975 	    if (SKIP_BLANKS == 0) {
5976 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5977 			       "Space required after the attribute type\n");
5978 	        if (tree != NULL)
5979 		    xmlFreeEnumeration(tree);
5980 		break;
5981 	    }
5982 
5983 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5984 	    if (def <= 0) {
5985                 if (defaultValue != NULL)
5986 		    xmlFree(defaultValue);
5987 	        if (tree != NULL)
5988 		    xmlFreeEnumeration(tree);
5989 	        break;
5990 	    }
5991 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5992 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5993 
5994 	    GROW;
5995             if (RAW != '>') {
5996 		if (SKIP_BLANKS == 0) {
5997 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5998 			"Space required after the attribute default value\n");
5999 		    if (defaultValue != NULL)
6000 			xmlFree(defaultValue);
6001 		    if (tree != NULL)
6002 			xmlFreeEnumeration(tree);
6003 		    break;
6004 		}
6005 	    }
6006 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6007 		(ctxt->sax->attributeDecl != NULL))
6008 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6009 	                        type, def, defaultValue, tree);
6010 	    else if (tree != NULL)
6011 		xmlFreeEnumeration(tree);
6012 
6013 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6014 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6015 		(def != XML_ATTRIBUTE_REQUIRED)) {
6016 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6017 	    }
6018 	    if (ctxt->sax2) {
6019 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6020 	    }
6021 	    if (defaultValue != NULL)
6022 	        xmlFree(defaultValue);
6023 	    GROW;
6024 	}
6025 	if (RAW == '>') {
6026 	    if (inputid != ctxt->input->id) {
6027 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6028                                "Attribute list declaration doesn't start and"
6029                                " stop in the same entity\n");
6030 	    }
6031 	    NEXT;
6032 	}
6033     }
6034 }
6035 
6036 /**
6037  * xmlParseElementMixedContentDecl:
6038  * @ctxt:  an XML parser context
6039  * @inputchk:  the input used for the current entity, needed for boundary checks
6040  *
6041  * parse the declaration for a Mixed Element content
6042  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6043  *
6044  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6045  *                '(' S? '#PCDATA' S? ')'
6046  *
6047  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6048  *
6049  * [ VC: No Duplicate Types ]
6050  * The same name must not appear more than once in a single
6051  * mixed-content declaration.
6052  *
6053  * returns: the list of the xmlElementContentPtr describing the element choices
6054  */
6055 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6056 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6057     xmlElementContentPtr ret = NULL, cur = NULL, n;
6058     const xmlChar *elem = NULL;
6059 
6060     GROW;
6061     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6062 	SKIP(7);
6063 	SKIP_BLANKS;
6064 	SHRINK;
6065 	if (RAW == ')') {
6066 	    if (ctxt->input->id != inputchk) {
6067 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6068                                "Element content declaration doesn't start and"
6069                                " stop in the same entity\n");
6070 	    }
6071 	    NEXT;
6072 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6073 	    if (ret == NULL)
6074 	        return(NULL);
6075 	    if (RAW == '*') {
6076 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6077 		NEXT;
6078 	    }
6079 	    return(ret);
6080 	}
6081 	if ((RAW == '(') || (RAW == '|')) {
6082 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6083 	    if (ret == NULL) return(NULL);
6084 	}
6085 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6086 	    NEXT;
6087 	    if (elem == NULL) {
6088 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6089 		if (ret == NULL) return(NULL);
6090 		ret->c1 = cur;
6091 		if (cur != NULL)
6092 		    cur->parent = ret;
6093 		cur = ret;
6094 	    } else {
6095 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6096 		if (n == NULL) return(NULL);
6097 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6098 		if (n->c1 != NULL)
6099 		    n->c1->parent = n;
6100 	        cur->c2 = n;
6101 		if (n != NULL)
6102 		    n->parent = cur;
6103 		cur = n;
6104 	    }
6105 	    SKIP_BLANKS;
6106 	    elem = xmlParseName(ctxt);
6107 	    if (elem == NULL) {
6108 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6109 			"xmlParseElementMixedContentDecl : Name expected\n");
6110 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6111 		return(NULL);
6112 	    }
6113 	    SKIP_BLANKS;
6114 	    GROW;
6115 	}
6116 	if ((RAW == ')') && (NXT(1) == '*')) {
6117 	    if (elem != NULL) {
6118 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6119 		                               XML_ELEMENT_CONTENT_ELEMENT);
6120 		if (cur->c2 != NULL)
6121 		    cur->c2->parent = cur;
6122             }
6123             if (ret != NULL)
6124                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 	    if (ctxt->input->id != inputchk) {
6126 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6127                                "Element content declaration doesn't start and"
6128                                " stop in the same entity\n");
6129 	    }
6130 	    SKIP(2);
6131 	} else {
6132 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6133 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6134 	    return(NULL);
6135 	}
6136 
6137     } else {
6138 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6139     }
6140     return(ret);
6141 }
6142 
6143 /**
6144  * xmlParseElementChildrenContentDeclPriv:
6145  * @ctxt:  an XML parser context
6146  * @inputchk:  the input used for the current entity, needed for boundary checks
6147  * @depth: the level of recursion
6148  *
6149  * parse the declaration for a Mixed Element content
6150  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6151  *
6152  *
6153  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6154  *
6155  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6156  *
6157  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6158  *
6159  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6160  *
6161  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6162  * TODO Parameter-entity replacement text must be properly nested
6163  *	with parenthesized groups. That is to say, if either of the
6164  *	opening or closing parentheses in a choice, seq, or Mixed
6165  *	construct is contained in the replacement text for a parameter
6166  *	entity, both must be contained in the same replacement text. For
6167  *	interoperability, if a parameter-entity reference appears in a
6168  *	choice, seq, or Mixed construct, its replacement text should not
6169  *	be empty, and neither the first nor last non-blank character of
6170  *	the replacement text should be a connector (| or ,).
6171  *
6172  * Returns the tree of xmlElementContentPtr describing the element
6173  *          hierarchy.
6174  */
6175 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6176 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6177                                        int depth) {
6178     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6179     const xmlChar *elem;
6180     xmlChar type = 0;
6181 
6182     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6183         (depth >  2048)) {
6184         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6185 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6186                           depth);
6187 	return(NULL);
6188     }
6189     SKIP_BLANKS;
6190     GROW;
6191     if (RAW == '(') {
6192 	int inputid = ctxt->input->id;
6193 
6194         /* Recurse on first child */
6195 	NEXT;
6196 	SKIP_BLANKS;
6197         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6198                                                            depth + 1);
6199 	SKIP_BLANKS;
6200 	GROW;
6201     } else {
6202 	elem = xmlParseName(ctxt);
6203 	if (elem == NULL) {
6204 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6205 	    return(NULL);
6206 	}
6207         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6208 	if (cur == NULL) {
6209 	    xmlErrMemory(ctxt, NULL);
6210 	    return(NULL);
6211 	}
6212 	GROW;
6213 	if (RAW == '?') {
6214 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6215 	    NEXT;
6216 	} else if (RAW == '*') {
6217 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6218 	    NEXT;
6219 	} else if (RAW == '+') {
6220 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6221 	    NEXT;
6222 	} else {
6223 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6224 	}
6225 	GROW;
6226     }
6227     SKIP_BLANKS;
6228     SHRINK;
6229     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6230         /*
6231 	 * Each loop we parse one separator and one element.
6232 	 */
6233         if (RAW == ',') {
6234 	    if (type == 0) type = CUR;
6235 
6236 	    /*
6237 	     * Detect "Name | Name , Name" error
6238 	     */
6239 	    else if (type != CUR) {
6240 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6241 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6242 		                  type);
6243 		if ((last != NULL) && (last != ret))
6244 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6245 		if (ret != NULL)
6246 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6247 		return(NULL);
6248 	    }
6249 	    NEXT;
6250 
6251 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6252 	    if (op == NULL) {
6253 		if ((last != NULL) && (last != ret))
6254 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6255 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6256 		return(NULL);
6257 	    }
6258 	    if (last == NULL) {
6259 		op->c1 = ret;
6260 		if (ret != NULL)
6261 		    ret->parent = op;
6262 		ret = cur = op;
6263 	    } else {
6264 	        cur->c2 = op;
6265 		if (op != NULL)
6266 		    op->parent = cur;
6267 		op->c1 = last;
6268 		if (last != NULL)
6269 		    last->parent = op;
6270 		cur =op;
6271 		last = NULL;
6272 	    }
6273 	} else if (RAW == '|') {
6274 	    if (type == 0) type = CUR;
6275 
6276 	    /*
6277 	     * Detect "Name , Name | Name" error
6278 	     */
6279 	    else if (type != CUR) {
6280 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6281 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6282 				  type);
6283 		if ((last != NULL) && (last != ret))
6284 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6285 		if (ret != NULL)
6286 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6287 		return(NULL);
6288 	    }
6289 	    NEXT;
6290 
6291 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6292 	    if (op == NULL) {
6293 		if ((last != NULL) && (last != ret))
6294 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6295 		if (ret != NULL)
6296 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6297 		return(NULL);
6298 	    }
6299 	    if (last == NULL) {
6300 		op->c1 = ret;
6301 		if (ret != NULL)
6302 		    ret->parent = op;
6303 		ret = cur = op;
6304 	    } else {
6305 	        cur->c2 = op;
6306 		if (op != NULL)
6307 		    op->parent = cur;
6308 		op->c1 = last;
6309 		if (last != NULL)
6310 		    last->parent = op;
6311 		cur =op;
6312 		last = NULL;
6313 	    }
6314 	} else {
6315 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6316 	    if ((last != NULL) && (last != ret))
6317 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6318 	    if (ret != NULL)
6319 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6320 	    return(NULL);
6321 	}
6322 	GROW;
6323 	SKIP_BLANKS;
6324 	GROW;
6325 	if (RAW == '(') {
6326 	    int inputid = ctxt->input->id;
6327 	    /* Recurse on second child */
6328 	    NEXT;
6329 	    SKIP_BLANKS;
6330 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6331                                                           depth + 1);
6332 	    SKIP_BLANKS;
6333 	} else {
6334 	    elem = xmlParseName(ctxt);
6335 	    if (elem == NULL) {
6336 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6337 		if (ret != NULL)
6338 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6339 		return(NULL);
6340 	    }
6341 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6342 	    if (last == NULL) {
6343 		if (ret != NULL)
6344 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6345 		return(NULL);
6346 	    }
6347 	    if (RAW == '?') {
6348 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6349 		NEXT;
6350 	    } else if (RAW == '*') {
6351 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6352 		NEXT;
6353 	    } else if (RAW == '+') {
6354 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6355 		NEXT;
6356 	    } else {
6357 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6358 	    }
6359 	}
6360 	SKIP_BLANKS;
6361 	GROW;
6362     }
6363     if ((cur != NULL) && (last != NULL)) {
6364         cur->c2 = last;
6365 	if (last != NULL)
6366 	    last->parent = cur;
6367     }
6368     if (ctxt->input->id != inputchk) {
6369 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6370                        "Element content declaration doesn't start and stop in"
6371                        " the same entity\n");
6372     }
6373     NEXT;
6374     if (RAW == '?') {
6375 	if (ret != NULL) {
6376 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6377 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6378 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6379 	    else
6380 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6381 	}
6382 	NEXT;
6383     } else if (RAW == '*') {
6384 	if (ret != NULL) {
6385 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6386 	    cur = ret;
6387 	    /*
6388 	     * Some normalization:
6389 	     * (a | b* | c?)* == (a | b | c)*
6390 	     */
6391 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6392 		if ((cur->c1 != NULL) &&
6393 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6394 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6395 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6396 		if ((cur->c2 != NULL) &&
6397 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6399 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 		cur = cur->c2;
6401 	    }
6402 	}
6403 	NEXT;
6404     } else if (RAW == '+') {
6405 	if (ret != NULL) {
6406 	    int found = 0;
6407 
6408 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6409 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6410 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411 	    else
6412 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6413 	    /*
6414 	     * Some normalization:
6415 	     * (a | b*)+ == (a | b)*
6416 	     * (a | b?)+ == (a | b)*
6417 	     */
6418 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6419 		if ((cur->c1 != NULL) &&
6420 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6421 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6422 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6423 		    found = 1;
6424 		}
6425 		if ((cur->c2 != NULL) &&
6426 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6427 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6428 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 		    found = 1;
6430 		}
6431 		cur = cur->c2;
6432 	    }
6433 	    if (found)
6434 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6435 	}
6436 	NEXT;
6437     }
6438     return(ret);
6439 }
6440 
6441 /**
6442  * xmlParseElementChildrenContentDecl:
6443  * @ctxt:  an XML parser context
6444  * @inputchk:  the input used for the current entity, needed for boundary checks
6445  *
6446  * parse the declaration for a Mixed Element content
6447  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6448  *
6449  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6450  *
6451  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6452  *
6453  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6454  *
6455  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6456  *
6457  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6458  * TODO Parameter-entity replacement text must be properly nested
6459  *	with parenthesized groups. That is to say, if either of the
6460  *	opening or closing parentheses in a choice, seq, or Mixed
6461  *	construct is contained in the replacement text for a parameter
6462  *	entity, both must be contained in the same replacement text. For
6463  *	interoperability, if a parameter-entity reference appears in a
6464  *	choice, seq, or Mixed construct, its replacement text should not
6465  *	be empty, and neither the first nor last non-blank character of
6466  *	the replacement text should be a connector (| or ,).
6467  *
6468  * Returns the tree of xmlElementContentPtr describing the element
6469  *          hierarchy.
6470  */
6471 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6472 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6473     /* stub left for API/ABI compat */
6474     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6475 }
6476 
6477 /**
6478  * xmlParseElementContentDecl:
6479  * @ctxt:  an XML parser context
6480  * @name:  the name of the element being defined.
6481  * @result:  the Element Content pointer will be stored here if any
6482  *
6483  * parse the declaration for an Element content either Mixed or Children,
6484  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6485  *
6486  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6487  *
6488  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6489  */
6490 
6491 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6492 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6493                            xmlElementContentPtr *result) {
6494 
6495     xmlElementContentPtr tree = NULL;
6496     int inputid = ctxt->input->id;
6497     int res;
6498 
6499     *result = NULL;
6500 
6501     if (RAW != '(') {
6502 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6503 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6504 	return(-1);
6505     }
6506     NEXT;
6507     GROW;
6508     if (ctxt->instate == XML_PARSER_EOF)
6509         return(-1);
6510     SKIP_BLANKS;
6511     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6512         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6513 	res = XML_ELEMENT_TYPE_MIXED;
6514     } else {
6515         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6516 	res = XML_ELEMENT_TYPE_ELEMENT;
6517     }
6518     SKIP_BLANKS;
6519     *result = tree;
6520     return(res);
6521 }
6522 
6523 /**
6524  * xmlParseElementDecl:
6525  * @ctxt:  an XML parser context
6526  *
6527  * parse an Element declaration.
6528  *
6529  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6530  *
6531  * [ VC: Unique Element Type Declaration ]
6532  * No element type may be declared more than once
6533  *
6534  * Returns the type of the element, or -1 in case of error
6535  */
6536 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6537 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6538     const xmlChar *name;
6539     int ret = -1;
6540     xmlElementContentPtr content  = NULL;
6541 
6542     /* GROW; done in the caller */
6543     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6544 	int inputid = ctxt->input->id;
6545 
6546 	SKIP(9);
6547 	if (SKIP_BLANKS == 0) {
6548 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6549 		           "Space required after 'ELEMENT'\n");
6550 	    return(-1);
6551 	}
6552         name = xmlParseName(ctxt);
6553 	if (name == NULL) {
6554 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6555 			   "xmlParseElementDecl: no name for Element\n");
6556 	    return(-1);
6557 	}
6558 	if (SKIP_BLANKS == 0) {
6559 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6560 			   "Space required after the element name\n");
6561 	}
6562 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6563 	    SKIP(5);
6564 	    /*
6565 	     * Element must always be empty.
6566 	     */
6567 	    ret = XML_ELEMENT_TYPE_EMPTY;
6568 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6569 	           (NXT(2) == 'Y')) {
6570 	    SKIP(3);
6571 	    /*
6572 	     * Element is a generic container.
6573 	     */
6574 	    ret = XML_ELEMENT_TYPE_ANY;
6575 	} else if (RAW == '(') {
6576 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6577 	} else {
6578 	    /*
6579 	     * [ WFC: PEs in Internal Subset ] error handling.
6580 	     */
6581 	    if ((RAW == '%') && (ctxt->external == 0) &&
6582 	        (ctxt->inputNr == 1)) {
6583 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6584 	  "PEReference: forbidden within markup decl in internal subset\n");
6585 	    } else {
6586 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6587 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6588             }
6589 	    return(-1);
6590 	}
6591 
6592 	SKIP_BLANKS;
6593 
6594 	if (RAW != '>') {
6595 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6596 	    if (content != NULL) {
6597 		xmlFreeDocElementContent(ctxt->myDoc, content);
6598 	    }
6599 	} else {
6600 	    if (inputid != ctxt->input->id) {
6601 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6602                                "Element declaration doesn't start and stop in"
6603                                " the same entity\n");
6604 	    }
6605 
6606 	    NEXT;
6607 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6608 		(ctxt->sax->elementDecl != NULL)) {
6609 		if (content != NULL)
6610 		    content->parent = NULL;
6611 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6612 		                       content);
6613 		if ((content != NULL) && (content->parent == NULL)) {
6614 		    /*
6615 		     * this is a trick: if xmlAddElementDecl is called,
6616 		     * instead of copying the full tree it is plugged directly
6617 		     * if called from the parser. Avoid duplicating the
6618 		     * interfaces or change the API/ABI
6619 		     */
6620 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6621 		}
6622 	    } else if (content != NULL) {
6623 		xmlFreeDocElementContent(ctxt->myDoc, content);
6624 	    }
6625 	}
6626     }
6627     return(ret);
6628 }
6629 
6630 /**
6631  * xmlParseConditionalSections
6632  * @ctxt:  an XML parser context
6633  *
6634  * [61] conditionalSect ::= includeSect | ignoreSect
6635  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6636  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6637  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6638  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6639  */
6640 
6641 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6642 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6643     int id = ctxt->input->id;
6644 
6645     SKIP(3);
6646     SKIP_BLANKS;
6647     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6648 	SKIP(7);
6649 	SKIP_BLANKS;
6650 	if (RAW != '[') {
6651 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6652 	    xmlHaltParser(ctxt);
6653 	    return;
6654 	} else {
6655 	    if (ctxt->input->id != id) {
6656 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657 	                       "All markup of the conditional section is not"
6658                                " in the same entity\n");
6659 	    }
6660 	    NEXT;
6661 	}
6662 	if (xmlParserDebugEntities) {
6663 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6664 		xmlGenericError(xmlGenericErrorContext,
6665 			"%s(%d): ", ctxt->input->filename,
6666 			ctxt->input->line);
6667 	    xmlGenericError(xmlGenericErrorContext,
6668 		    "Entering INCLUDE Conditional Section\n");
6669 	}
6670 
6671         SKIP_BLANKS;
6672         GROW;
6673 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6674 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6675 	    const xmlChar *check = CUR_PTR;
6676 	    unsigned int cons = ctxt->input->consumed;
6677 
6678 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6679 		xmlParseConditionalSections(ctxt);
6680 	    } else
6681 		xmlParseMarkupDecl(ctxt);
6682 
6683             SKIP_BLANKS;
6684             GROW;
6685 
6686 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6687 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6688 		xmlHaltParser(ctxt);
6689 		break;
6690 	    }
6691 	}
6692 	if (xmlParserDebugEntities) {
6693 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6694 		xmlGenericError(xmlGenericErrorContext,
6695 			"%s(%d): ", ctxt->input->filename,
6696 			ctxt->input->line);
6697 	    xmlGenericError(xmlGenericErrorContext,
6698 		    "Leaving INCLUDE Conditional Section\n");
6699 	}
6700 
6701     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6702 	int state;
6703 	xmlParserInputState instate;
6704 	int depth = 0;
6705 
6706 	SKIP(6);
6707 	SKIP_BLANKS;
6708 	if (RAW != '[') {
6709 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6710 	    xmlHaltParser(ctxt);
6711 	    return;
6712 	} else {
6713 	    if (ctxt->input->id != id) {
6714 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6715 	                       "All markup of the conditional section is not"
6716                                " in the same entity\n");
6717 	    }
6718 	    NEXT;
6719 	}
6720 	if (xmlParserDebugEntities) {
6721 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6722 		xmlGenericError(xmlGenericErrorContext,
6723 			"%s(%d): ", ctxt->input->filename,
6724 			ctxt->input->line);
6725 	    xmlGenericError(xmlGenericErrorContext,
6726 		    "Entering IGNORE Conditional Section\n");
6727 	}
6728 
6729 	/*
6730 	 * Parse up to the end of the conditional section
6731 	 * But disable SAX event generating DTD building in the meantime
6732 	 */
6733 	state = ctxt->disableSAX;
6734 	instate = ctxt->instate;
6735 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6736 	ctxt->instate = XML_PARSER_IGNORE;
6737 
6738 	while (((depth >= 0) && (RAW != 0)) &&
6739                (ctxt->instate != XML_PARSER_EOF)) {
6740 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6741 	    depth++;
6742 	    SKIP(3);
6743 	    continue;
6744 	  }
6745 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6746 	    if (--depth >= 0) SKIP(3);
6747 	    continue;
6748 	  }
6749 	  NEXT;
6750 	  continue;
6751 	}
6752 
6753 	ctxt->disableSAX = state;
6754 	ctxt->instate = instate;
6755 
6756 	if (xmlParserDebugEntities) {
6757 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6758 		xmlGenericError(xmlGenericErrorContext,
6759 			"%s(%d): ", ctxt->input->filename,
6760 			ctxt->input->line);
6761 	    xmlGenericError(xmlGenericErrorContext,
6762 		    "Leaving IGNORE Conditional Section\n");
6763 	}
6764 
6765     } else {
6766 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6767 	xmlHaltParser(ctxt);
6768 	return;
6769     }
6770 
6771     if (RAW == 0)
6772         SHRINK;
6773 
6774     if (RAW == 0) {
6775 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6776     } else {
6777 	if (ctxt->input->id != id) {
6778 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6779 	                   "All markup of the conditional section is not in"
6780                            " the same entity\n");
6781 	}
6782 	if ((ctxt-> instate != XML_PARSER_EOF) &&
6783 	    ((ctxt->input->cur + 3) <= ctxt->input->end))
6784 	    SKIP(3);
6785     }
6786 }
6787 
6788 /**
6789  * xmlParseMarkupDecl:
6790  * @ctxt:  an XML parser context
6791  *
6792  * parse Markup declarations
6793  *
6794  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6795  *                     NotationDecl | PI | Comment
6796  *
6797  * [ VC: Proper Declaration/PE Nesting ]
6798  * Parameter-entity replacement text must be properly nested with
6799  * markup declarations. That is to say, if either the first character
6800  * or the last character of a markup declaration (markupdecl above) is
6801  * contained in the replacement text for a parameter-entity reference,
6802  * both must be contained in the same replacement text.
6803  *
6804  * [ WFC: PEs in Internal Subset ]
6805  * In the internal DTD subset, parameter-entity references can occur
6806  * only where markup declarations can occur, not within markup declarations.
6807  * (This does not apply to references that occur in external parameter
6808  * entities or to the external subset.)
6809  */
6810 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6811 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6812     GROW;
6813     if (CUR == '<') {
6814         if (NXT(1) == '!') {
6815 	    switch (NXT(2)) {
6816 	        case 'E':
6817 		    if (NXT(3) == 'L')
6818 			xmlParseElementDecl(ctxt);
6819 		    else if (NXT(3) == 'N')
6820 			xmlParseEntityDecl(ctxt);
6821 		    break;
6822 	        case 'A':
6823 		    xmlParseAttributeListDecl(ctxt);
6824 		    break;
6825 	        case 'N':
6826 		    xmlParseNotationDecl(ctxt);
6827 		    break;
6828 	        case '-':
6829 		    xmlParseComment(ctxt);
6830 		    break;
6831 		default:
6832 		    /* there is an error but it will be detected later */
6833 		    break;
6834 	    }
6835 	} else if (NXT(1) == '?') {
6836 	    xmlParsePI(ctxt);
6837 	}
6838     }
6839 
6840     /*
6841      * detect requirement to exit there and act accordingly
6842      * and avoid having instate overriden later on
6843      */
6844     if (ctxt->instate == XML_PARSER_EOF)
6845         return;
6846 
6847     /*
6848      * Conditional sections are allowed from entities included
6849      * by PE References in the internal subset.
6850      */
6851     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6852         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6853 	    xmlParseConditionalSections(ctxt);
6854 	}
6855     }
6856 
6857     ctxt->instate = XML_PARSER_DTD;
6858 }
6859 
6860 /**
6861  * xmlParseTextDecl:
6862  * @ctxt:  an XML parser context
6863  *
6864  * parse an XML declaration header for external entities
6865  *
6866  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6867  */
6868 
6869 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6870 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6871     xmlChar *version;
6872     const xmlChar *encoding;
6873 
6874     /*
6875      * We know that '<?xml' is here.
6876      */
6877     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6878 	SKIP(5);
6879     } else {
6880 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6881 	return;
6882     }
6883 
6884     if (SKIP_BLANKS == 0) {
6885 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6886 		       "Space needed after '<?xml'\n");
6887     }
6888 
6889     /*
6890      * We may have the VersionInfo here.
6891      */
6892     version = xmlParseVersionInfo(ctxt);
6893     if (version == NULL)
6894 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6895     else {
6896 	if (SKIP_BLANKS == 0) {
6897 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6898 		           "Space needed here\n");
6899 	}
6900     }
6901     ctxt->input->version = version;
6902 
6903     /*
6904      * We must have the encoding declaration
6905      */
6906     encoding = xmlParseEncodingDecl(ctxt);
6907     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6908 	/*
6909 	 * The XML REC instructs us to stop parsing right here
6910 	 */
6911         return;
6912     }
6913     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6914 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6915 		       "Missing encoding in text declaration\n");
6916     }
6917 
6918     SKIP_BLANKS;
6919     if ((RAW == '?') && (NXT(1) == '>')) {
6920         SKIP(2);
6921     } else if (RAW == '>') {
6922         /* Deprecated old WD ... */
6923 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6924 	NEXT;
6925     } else {
6926 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6927 	MOVETO_ENDTAG(CUR_PTR);
6928 	NEXT;
6929     }
6930 }
6931 
6932 /**
6933  * xmlParseExternalSubset:
6934  * @ctxt:  an XML parser context
6935  * @ExternalID: the external identifier
6936  * @SystemID: the system identifier (or URL)
6937  *
6938  * parse Markup declarations from an external subset
6939  *
6940  * [30] extSubset ::= textDecl? extSubsetDecl
6941  *
6942  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6943  */
6944 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6945 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6946                        const xmlChar *SystemID) {
6947     xmlDetectSAX2(ctxt);
6948     GROW;
6949 
6950     if ((ctxt->encoding == NULL) &&
6951         (ctxt->input->end - ctxt->input->cur >= 4)) {
6952         xmlChar start[4];
6953 	xmlCharEncoding enc;
6954 
6955 	start[0] = RAW;
6956 	start[1] = NXT(1);
6957 	start[2] = NXT(2);
6958 	start[3] = NXT(3);
6959 	enc = xmlDetectCharEncoding(start, 4);
6960 	if (enc != XML_CHAR_ENCODING_NONE)
6961 	    xmlSwitchEncoding(ctxt, enc);
6962     }
6963 
6964     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6965 	xmlParseTextDecl(ctxt);
6966 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6967 	    /*
6968 	     * The XML REC instructs us to stop parsing right here
6969 	     */
6970 	    xmlHaltParser(ctxt);
6971 	    return;
6972 	}
6973     }
6974     if (ctxt->myDoc == NULL) {
6975         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6976 	if (ctxt->myDoc == NULL) {
6977 	    xmlErrMemory(ctxt, "New Doc failed");
6978 	    return;
6979 	}
6980 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6981     }
6982     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6983         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6984 
6985     ctxt->instate = XML_PARSER_DTD;
6986     ctxt->external = 1;
6987     SKIP_BLANKS;
6988     while (((RAW == '<') && (NXT(1) == '?')) ||
6989            ((RAW == '<') && (NXT(1) == '!')) ||
6990 	   (RAW == '%')) {
6991 	const xmlChar *check = CUR_PTR;
6992 	unsigned int cons = ctxt->input->consumed;
6993 
6994 	GROW;
6995         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6996 	    xmlParseConditionalSections(ctxt);
6997 	} else
6998 	    xmlParseMarkupDecl(ctxt);
6999         SKIP_BLANKS;
7000 
7001 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7002 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7003 	    break;
7004 	}
7005     }
7006 
7007     if (RAW != 0) {
7008 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7009     }
7010 
7011 }
7012 
7013 /**
7014  * xmlParseReference:
7015  * @ctxt:  an XML parser context
7016  *
7017  * parse and handle entity references in content, depending on the SAX
7018  * interface, this may end-up in a call to character() if this is a
7019  * CharRef, a predefined entity, if there is no reference() callback.
7020  * or if the parser was asked to switch to that mode.
7021  *
7022  * [67] Reference ::= EntityRef | CharRef
7023  */
7024 void
xmlParseReference(xmlParserCtxtPtr ctxt)7025 xmlParseReference(xmlParserCtxtPtr ctxt) {
7026     xmlEntityPtr ent;
7027     xmlChar *val;
7028     int was_checked;
7029     xmlNodePtr list = NULL;
7030     xmlParserErrors ret = XML_ERR_OK;
7031 
7032 
7033     if (RAW != '&')
7034         return;
7035 
7036     /*
7037      * Simple case of a CharRef
7038      */
7039     if (NXT(1) == '#') {
7040 	int i = 0;
7041 	xmlChar out[12];
7042 	int hex = NXT(2);
7043 	int value = xmlParseCharRef(ctxt);
7044 
7045 	if (value == 0)
7046 	    return;
7047 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7048 	    /*
7049 	     * So we are using non-UTF-8 buffers
7050 	     * Check that the char fit on 8bits, if not
7051 	     * generate a CharRef.
7052 	     */
7053 	    if (value <= 0xFF) {
7054 		out[0] = value;
7055 		out[1] = 0;
7056 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7057 		    (!ctxt->disableSAX))
7058 		    ctxt->sax->characters(ctxt->userData, out, 1);
7059 	    } else {
7060 		if ((hex == 'x') || (hex == 'X'))
7061 		    snprintf((char *)out, sizeof(out), "#x%X", value);
7062 		else
7063 		    snprintf((char *)out, sizeof(out), "#%d", value);
7064 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065 		    (!ctxt->disableSAX))
7066 		    ctxt->sax->reference(ctxt->userData, out);
7067 	    }
7068 	} else {
7069 	    /*
7070 	     * Just encode the value in UTF-8
7071 	     */
7072 	    COPY_BUF(0 ,out, i, value);
7073 	    out[i] = 0;
7074 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 		(!ctxt->disableSAX))
7076 		ctxt->sax->characters(ctxt->userData, out, i);
7077 	}
7078 	return;
7079     }
7080 
7081     /*
7082      * We are seeing an entity reference
7083      */
7084     ent = xmlParseEntityRef(ctxt);
7085     if (ent == NULL) return;
7086     if (!ctxt->wellFormed)
7087 	return;
7088     was_checked = ent->checked;
7089 
7090     /* special case of predefined entities */
7091     if ((ent->name == NULL) ||
7092         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7093 	val = ent->content;
7094 	if (val == NULL) return;
7095 	/*
7096 	 * inline the entity.
7097 	 */
7098 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7099 	    (!ctxt->disableSAX))
7100 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7101 	return;
7102     }
7103 
7104     /*
7105      * The first reference to the entity trigger a parsing phase
7106      * where the ent->children is filled with the result from
7107      * the parsing.
7108      * Note: external parsed entities will not be loaded, it is not
7109      * required for a non-validating parser, unless the parsing option
7110      * of validating, or substituting entities were given. Doing so is
7111      * far more secure as the parser will only process data coming from
7112      * the document entity by default.
7113      */
7114     if (((ent->checked == 0) ||
7115          ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7116         ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7117          (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7118 	unsigned long oldnbent = ctxt->nbentities;
7119 
7120 	/*
7121 	 * This is a bit hackish but this seems the best
7122 	 * way to make sure both SAX and DOM entity support
7123 	 * behaves okay.
7124 	 */
7125 	void *user_data;
7126 	if (ctxt->userData == ctxt)
7127 	    user_data = NULL;
7128 	else
7129 	    user_data = ctxt->userData;
7130 
7131 	/*
7132 	 * Check that this entity is well formed
7133 	 * 4.3.2: An internal general parsed entity is well-formed
7134 	 * if its replacement text matches the production labeled
7135 	 * content.
7136 	 */
7137 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7138 	    ctxt->depth++;
7139 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7140 	                                              user_data, &list);
7141 	    ctxt->depth--;
7142 
7143 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7144 	    ctxt->depth++;
7145 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7146 	                                   user_data, ctxt->depth, ent->URI,
7147 					   ent->ExternalID, &list);
7148 	    ctxt->depth--;
7149 	} else {
7150 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
7151 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7152 			 "invalid entity type found\n", NULL);
7153 	}
7154 
7155 	/*
7156 	 * Store the number of entities needing parsing for this entity
7157 	 * content and do checkings
7158 	 */
7159 	ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7160 	if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7161 	    ent->checked |= 1;
7162 	if (ret == XML_ERR_ENTITY_LOOP) {
7163 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7164 	    xmlFreeNodeList(list);
7165 	    return;
7166 	}
7167 	if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7168 	    xmlFreeNodeList(list);
7169 	    return;
7170 	}
7171 
7172 	if ((ret == XML_ERR_OK) && (list != NULL)) {
7173 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7174 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7175 		(ent->children == NULL)) {
7176 		ent->children = list;
7177 		if (ctxt->replaceEntities) {
7178 		    /*
7179 		     * Prune it directly in the generated document
7180 		     * except for single text nodes.
7181 		     */
7182 		    if (((list->type == XML_TEXT_NODE) &&
7183 			 (list->next == NULL)) ||
7184 			(ctxt->parseMode == XML_PARSE_READER)) {
7185 			list->parent = (xmlNodePtr) ent;
7186 			list = NULL;
7187 			ent->owner = 1;
7188 		    } else {
7189 			ent->owner = 0;
7190 			while (list != NULL) {
7191 			    list->parent = (xmlNodePtr) ctxt->node;
7192 			    list->doc = ctxt->myDoc;
7193 			    if (list->next == NULL)
7194 				ent->last = list;
7195 			    list = list->next;
7196 			}
7197 			list = ent->children;
7198 #ifdef LIBXML_LEGACY_ENABLED
7199 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7200 			  xmlAddEntityReference(ent, list, NULL);
7201 #endif /* LIBXML_LEGACY_ENABLED */
7202 		    }
7203 		} else {
7204 		    ent->owner = 1;
7205 		    while (list != NULL) {
7206 			list->parent = (xmlNodePtr) ent;
7207 			xmlSetTreeDoc(list, ent->doc);
7208 			if (list->next == NULL)
7209 			    ent->last = list;
7210 			list = list->next;
7211 		    }
7212 		}
7213 	    } else {
7214 		xmlFreeNodeList(list);
7215 		list = NULL;
7216 	    }
7217 	} else if ((ret != XML_ERR_OK) &&
7218 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7219 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7220 		     "Entity '%s' failed to parse\n", ent->name);
7221             if (ent->content != NULL)
7222                 ent->content[0] = 0;
7223 	    xmlParserEntityCheck(ctxt, 0, ent, 0);
7224 	} else if (list != NULL) {
7225 	    xmlFreeNodeList(list);
7226 	    list = NULL;
7227 	}
7228 	if (ent->checked == 0)
7229 	    ent->checked = 2;
7230 
7231         /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7232         was_checked = 0;
7233     } else if (ent->checked != 1) {
7234 	ctxt->nbentities += ent->checked / 2;
7235     }
7236 
7237     /*
7238      * Now that the entity content has been gathered
7239      * provide it to the application, this can take different forms based
7240      * on the parsing modes.
7241      */
7242     if (ent->children == NULL) {
7243 	/*
7244 	 * Probably running in SAX mode and the callbacks don't
7245 	 * build the entity content. So unless we already went
7246 	 * though parsing for first checking go though the entity
7247 	 * content to generate callbacks associated to the entity
7248 	 */
7249 	if (was_checked != 0) {
7250 	    void *user_data;
7251 	    /*
7252 	     * This is a bit hackish but this seems the best
7253 	     * way to make sure both SAX and DOM entity support
7254 	     * behaves okay.
7255 	     */
7256 	    if (ctxt->userData == ctxt)
7257 		user_data = NULL;
7258 	    else
7259 		user_data = ctxt->userData;
7260 
7261 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7262 		ctxt->depth++;
7263 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7264 				   ent->content, user_data, NULL);
7265 		ctxt->depth--;
7266 	    } else if (ent->etype ==
7267 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7268 		ctxt->depth++;
7269 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7270 			   ctxt->sax, user_data, ctxt->depth,
7271 			   ent->URI, ent->ExternalID, NULL);
7272 		ctxt->depth--;
7273 	    } else {
7274 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7275 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7276 			     "invalid entity type found\n", NULL);
7277 	    }
7278 	    if (ret == XML_ERR_ENTITY_LOOP) {
7279 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280 		return;
7281 	    }
7282 	}
7283 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7284 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7285 	    /*
7286 	     * Entity reference callback comes second, it's somewhat
7287 	     * superfluous but a compatibility to historical behaviour
7288 	     */
7289 	    ctxt->sax->reference(ctxt->userData, ent->name);
7290 	}
7291 	return;
7292     }
7293 
7294     /*
7295      * If we didn't get any children for the entity being built
7296      */
7297     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7298 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7299 	/*
7300 	 * Create a node.
7301 	 */
7302 	ctxt->sax->reference(ctxt->userData, ent->name);
7303 	return;
7304     }
7305 
7306     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7307 	/*
7308 	 * There is a problem on the handling of _private for entities
7309 	 * (bug 155816): Should we copy the content of the field from
7310 	 * the entity (possibly overwriting some value set by the user
7311 	 * when a copy is created), should we leave it alone, or should
7312 	 * we try to take care of different situations?  The problem
7313 	 * is exacerbated by the usage of this field by the xmlReader.
7314 	 * To fix this bug, we look at _private on the created node
7315 	 * and, if it's NULL, we copy in whatever was in the entity.
7316 	 * If it's not NULL we leave it alone.  This is somewhat of a
7317 	 * hack - maybe we should have further tests to determine
7318 	 * what to do.
7319 	 */
7320 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7321 	    /*
7322 	     * Seems we are generating the DOM content, do
7323 	     * a simple tree copy for all references except the first
7324 	     * In the first occurrence list contains the replacement.
7325 	     */
7326 	    if (((list == NULL) && (ent->owner == 0)) ||
7327 		(ctxt->parseMode == XML_PARSE_READER)) {
7328 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7329 
7330 		/*
7331 		 * We are copying here, make sure there is no abuse
7332 		 */
7333 		ctxt->sizeentcopy += ent->length + 5;
7334 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7335 		    return;
7336 
7337 		/*
7338 		 * when operating on a reader, the entities definitions
7339 		 * are always owning the entities subtree.
7340 		if (ctxt->parseMode == XML_PARSE_READER)
7341 		    ent->owner = 1;
7342 		 */
7343 
7344 		cur = ent->children;
7345 		while (cur != NULL) {
7346 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7347 		    if (nw != NULL) {
7348 			if (nw->_private == NULL)
7349 			    nw->_private = cur->_private;
7350 			if (firstChild == NULL){
7351 			    firstChild = nw;
7352 			}
7353 			nw = xmlAddChild(ctxt->node, nw);
7354 		    }
7355 		    if (cur == ent->last) {
7356 			/*
7357 			 * needed to detect some strange empty
7358 			 * node cases in the reader tests
7359 			 */
7360 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7361 			    (nw != NULL) &&
7362 			    (nw->type == XML_ELEMENT_NODE) &&
7363 			    (nw->children == NULL))
7364 			    nw->extra = 1;
7365 
7366 			break;
7367 		    }
7368 		    cur = cur->next;
7369 		}
7370 #ifdef LIBXML_LEGACY_ENABLED
7371 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7372 		  xmlAddEntityReference(ent, firstChild, nw);
7373 #endif /* LIBXML_LEGACY_ENABLED */
7374 	    } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7375 		xmlNodePtr nw = NULL, cur, next, last,
7376 			   firstChild = NULL;
7377 
7378 		/*
7379 		 * We are copying here, make sure there is no abuse
7380 		 */
7381 		ctxt->sizeentcopy += ent->length + 5;
7382 		if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7383 		    return;
7384 
7385 		/*
7386 		 * Copy the entity child list and make it the new
7387 		 * entity child list. The goal is to make sure any
7388 		 * ID or REF referenced will be the one from the
7389 		 * document content and not the entity copy.
7390 		 */
7391 		cur = ent->children;
7392 		ent->children = NULL;
7393 		last = ent->last;
7394 		ent->last = NULL;
7395 		while (cur != NULL) {
7396 		    next = cur->next;
7397 		    cur->next = NULL;
7398 		    cur->parent = NULL;
7399 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7400 		    if (nw != NULL) {
7401 			if (nw->_private == NULL)
7402 			    nw->_private = cur->_private;
7403 			if (firstChild == NULL){
7404 			    firstChild = cur;
7405 			}
7406 			xmlAddChild((xmlNodePtr) ent, nw);
7407 			xmlAddChild(ctxt->node, cur);
7408 		    }
7409 		    if (cur == last)
7410 			break;
7411 		    cur = next;
7412 		}
7413 		if (ent->owner == 0)
7414 		    ent->owner = 1;
7415 #ifdef LIBXML_LEGACY_ENABLED
7416 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7417 		  xmlAddEntityReference(ent, firstChild, nw);
7418 #endif /* LIBXML_LEGACY_ENABLED */
7419 	    } else {
7420 		const xmlChar *nbktext;
7421 
7422 		/*
7423 		 * the name change is to avoid coalescing of the
7424 		 * node with a possible previous text one which
7425 		 * would make ent->children a dangling pointer
7426 		 */
7427 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7428 					-1);
7429 		if (ent->children->type == XML_TEXT_NODE)
7430 		    ent->children->name = nbktext;
7431 		if ((ent->last != ent->children) &&
7432 		    (ent->last->type == XML_TEXT_NODE))
7433 		    ent->last->name = nbktext;
7434 		xmlAddChildList(ctxt->node, ent->children);
7435 	    }
7436 
7437 	    /*
7438 	     * This is to avoid a nasty side effect, see
7439 	     * characters() in SAX.c
7440 	     */
7441 	    ctxt->nodemem = 0;
7442 	    ctxt->nodelen = 0;
7443 	    return;
7444 	}
7445     }
7446 }
7447 
7448 /**
7449  * xmlParseEntityRef:
7450  * @ctxt:  an XML parser context
7451  *
7452  * parse ENTITY references declarations
7453  *
7454  * [68] EntityRef ::= '&' Name ';'
7455  *
7456  * [ WFC: Entity Declared ]
7457  * In a document without any DTD, a document with only an internal DTD
7458  * subset which contains no parameter entity references, or a document
7459  * with "standalone='yes'", the Name given in the entity reference
7460  * must match that in an entity declaration, except that well-formed
7461  * documents need not declare any of the following entities: amp, lt,
7462  * gt, apos, quot.  The declaration of a parameter entity must precede
7463  * any reference to it.  Similarly, the declaration of a general entity
7464  * must precede any reference to it which appears in a default value in an
7465  * attribute-list declaration. Note that if entities are declared in the
7466  * external subset or in external parameter entities, a non-validating
7467  * processor is not obligated to read and process their declarations;
7468  * for such documents, the rule that an entity must be declared is a
7469  * well-formedness constraint only if standalone='yes'.
7470  *
7471  * [ WFC: Parsed Entity ]
7472  * An entity reference must not contain the name of an unparsed entity
7473  *
7474  * Returns the xmlEntityPtr if found, or NULL otherwise.
7475  */
7476 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7477 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7478     const xmlChar *name;
7479     xmlEntityPtr ent = NULL;
7480 
7481     GROW;
7482     if (ctxt->instate == XML_PARSER_EOF)
7483         return(NULL);
7484 
7485     if (RAW != '&')
7486         return(NULL);
7487     NEXT;
7488     name = xmlParseName(ctxt);
7489     if (name == NULL) {
7490 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7491 		       "xmlParseEntityRef: no name\n");
7492         return(NULL);
7493     }
7494     if (RAW != ';') {
7495 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7496 	return(NULL);
7497     }
7498     NEXT;
7499 
7500     /*
7501      * Predefined entities override any extra definition
7502      */
7503     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7504         ent = xmlGetPredefinedEntity(name);
7505         if (ent != NULL)
7506             return(ent);
7507     }
7508 
7509     /*
7510      * Increase the number of entity references parsed
7511      */
7512     ctxt->nbentities++;
7513 
7514     /*
7515      * Ask first SAX for entity resolution, otherwise try the
7516      * entities which may have stored in the parser context.
7517      */
7518     if (ctxt->sax != NULL) {
7519 	if (ctxt->sax->getEntity != NULL)
7520 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7521 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7522 	    (ctxt->options & XML_PARSE_OLDSAX))
7523 	    ent = xmlGetPredefinedEntity(name);
7524 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7525 	    (ctxt->userData==ctxt)) {
7526 	    ent = xmlSAX2GetEntity(ctxt, name);
7527 	}
7528     }
7529     if (ctxt->instate == XML_PARSER_EOF)
7530 	return(NULL);
7531     /*
7532      * [ WFC: Entity Declared ]
7533      * In a document without any DTD, a document with only an
7534      * internal DTD subset which contains no parameter entity
7535      * references, or a document with "standalone='yes'", the
7536      * Name given in the entity reference must match that in an
7537      * entity declaration, except that well-formed documents
7538      * need not declare any of the following entities: amp, lt,
7539      * gt, apos, quot.
7540      * The declaration of a parameter entity must precede any
7541      * reference to it.
7542      * Similarly, the declaration of a general entity must
7543      * precede any reference to it which appears in a default
7544      * value in an attribute-list declaration. Note that if
7545      * entities are declared in the external subset or in
7546      * external parameter entities, a non-validating processor
7547      * is not obligated to read and process their declarations;
7548      * for such documents, the rule that an entity must be
7549      * declared is a well-formedness constraint only if
7550      * standalone='yes'.
7551      */
7552     if (ent == NULL) {
7553 	if ((ctxt->standalone == 1) ||
7554 	    ((ctxt->hasExternalSubset == 0) &&
7555 	     (ctxt->hasPErefs == 0))) {
7556 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7557 		     "Entity '%s' not defined\n", name);
7558 	} else {
7559 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7560 		     "Entity '%s' not defined\n", name);
7561 	    if ((ctxt->inSubset == 0) &&
7562 		(ctxt->sax != NULL) &&
7563 		(ctxt->sax->reference != NULL)) {
7564 		ctxt->sax->reference(ctxt->userData, name);
7565 	    }
7566 	}
7567 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7568 	ctxt->valid = 0;
7569     }
7570 
7571     /*
7572      * [ WFC: Parsed Entity ]
7573      * An entity reference must not contain the name of an
7574      * unparsed entity
7575      */
7576     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7577 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7578 		 "Entity reference to unparsed entity %s\n", name);
7579     }
7580 
7581     /*
7582      * [ WFC: No External Entity References ]
7583      * Attribute values cannot contain direct or indirect
7584      * entity references to external entities.
7585      */
7586     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7587 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7588 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7589 	     "Attribute references external entity '%s'\n", name);
7590     }
7591     /*
7592      * [ WFC: No < in Attribute Values ]
7593      * The replacement text of any entity referred to directly or
7594      * indirectly in an attribute value (other than "&lt;") must
7595      * not contain a <.
7596      */
7597     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7598 	     (ent != NULL) &&
7599 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7600 	if (((ent->checked & 1) || (ent->checked == 0)) &&
7601 	     (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7602 	    xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7603 	"'<' in entity '%s' is not allowed in attributes values\n", name);
7604         }
7605     }
7606 
7607     /*
7608      * Internal check, no parameter entities here ...
7609      */
7610     else {
7611 	switch (ent->etype) {
7612 	    case XML_INTERNAL_PARAMETER_ENTITY:
7613 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7614 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7615 	     "Attempt to reference the parameter entity '%s'\n",
7616 			      name);
7617 	    break;
7618 	    default:
7619 	    break;
7620 	}
7621     }
7622 
7623     /*
7624      * [ WFC: No Recursion ]
7625      * A parsed entity must not contain a recursive reference
7626      * to itself, either directly or indirectly.
7627      * Done somewhere else
7628      */
7629     return(ent);
7630 }
7631 
7632 /**
7633  * xmlParseStringEntityRef:
7634  * @ctxt:  an XML parser context
7635  * @str:  a pointer to an index in the string
7636  *
7637  * parse ENTITY references declarations, but this version parses it from
7638  * a string value.
7639  *
7640  * [68] EntityRef ::= '&' Name ';'
7641  *
7642  * [ WFC: Entity Declared ]
7643  * In a document without any DTD, a document with only an internal DTD
7644  * subset which contains no parameter entity references, or a document
7645  * with "standalone='yes'", the Name given in the entity reference
7646  * must match that in an entity declaration, except that well-formed
7647  * documents need not declare any of the following entities: amp, lt,
7648  * gt, apos, quot.  The declaration of a parameter entity must precede
7649  * any reference to it.  Similarly, the declaration of a general entity
7650  * must precede any reference to it which appears in a default value in an
7651  * attribute-list declaration. Note that if entities are declared in the
7652  * external subset or in external parameter entities, a non-validating
7653  * processor is not obligated to read and process their declarations;
7654  * for such documents, the rule that an entity must be declared is a
7655  * well-formedness constraint only if standalone='yes'.
7656  *
7657  * [ WFC: Parsed Entity ]
7658  * An entity reference must not contain the name of an unparsed entity
7659  *
7660  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7661  * is updated to the current location in the string.
7662  */
7663 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7664 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7665     xmlChar *name;
7666     const xmlChar *ptr;
7667     xmlChar cur;
7668     xmlEntityPtr ent = NULL;
7669 
7670     if ((str == NULL) || (*str == NULL))
7671         return(NULL);
7672     ptr = *str;
7673     cur = *ptr;
7674     if (cur != '&')
7675 	return(NULL);
7676 
7677     ptr++;
7678     name = xmlParseStringName(ctxt, &ptr);
7679     if (name == NULL) {
7680 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 		       "xmlParseStringEntityRef: no name\n");
7682 	*str = ptr;
7683 	return(NULL);
7684     }
7685     if (*ptr != ';') {
7686 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7687         xmlFree(name);
7688 	*str = ptr;
7689 	return(NULL);
7690     }
7691     ptr++;
7692 
7693 
7694     /*
7695      * Predefined entities override any extra definition
7696      */
7697     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7698         ent = xmlGetPredefinedEntity(name);
7699         if (ent != NULL) {
7700             xmlFree(name);
7701             *str = ptr;
7702             return(ent);
7703         }
7704     }
7705 
7706     /*
7707      * Increate the number of entity references parsed
7708      */
7709     ctxt->nbentities++;
7710 
7711     /*
7712      * Ask first SAX for entity resolution, otherwise try the
7713      * entities which may have stored in the parser context.
7714      */
7715     if (ctxt->sax != NULL) {
7716 	if (ctxt->sax->getEntity != NULL)
7717 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7718 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7719 	    ent = xmlGetPredefinedEntity(name);
7720 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7721 	    ent = xmlSAX2GetEntity(ctxt, name);
7722 	}
7723     }
7724     if (ctxt->instate == XML_PARSER_EOF) {
7725 	xmlFree(name);
7726 	return(NULL);
7727     }
7728 
7729     /*
7730      * [ WFC: Entity Declared ]
7731      * In a document without any DTD, a document with only an
7732      * internal DTD subset which contains no parameter entity
7733      * references, or a document with "standalone='yes'", the
7734      * Name given in the entity reference must match that in an
7735      * entity declaration, except that well-formed documents
7736      * need not declare any of the following entities: amp, lt,
7737      * gt, apos, quot.
7738      * The declaration of a parameter entity must precede any
7739      * reference to it.
7740      * Similarly, the declaration of a general entity must
7741      * precede any reference to it which appears in a default
7742      * value in an attribute-list declaration. Note that if
7743      * entities are declared in the external subset or in
7744      * external parameter entities, a non-validating processor
7745      * is not obligated to read and process their declarations;
7746      * for such documents, the rule that an entity must be
7747      * declared is a well-formedness constraint only if
7748      * standalone='yes'.
7749      */
7750     if (ent == NULL) {
7751 	if ((ctxt->standalone == 1) ||
7752 	    ((ctxt->hasExternalSubset == 0) &&
7753 	     (ctxt->hasPErefs == 0))) {
7754 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7755 		     "Entity '%s' not defined\n", name);
7756 	} else {
7757 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7758 			  "Entity '%s' not defined\n",
7759 			  name);
7760 	}
7761 	xmlParserEntityCheck(ctxt, 0, ent, 0);
7762 	/* TODO ? check regressions ctxt->valid = 0; */
7763     }
7764 
7765     /*
7766      * [ WFC: Parsed Entity ]
7767      * An entity reference must not contain the name of an
7768      * unparsed entity
7769      */
7770     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7771 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7772 		 "Entity reference to unparsed entity %s\n", name);
7773     }
7774 
7775     /*
7776      * [ WFC: No External Entity References ]
7777      * Attribute values cannot contain direct or indirect
7778      * entity references to external entities.
7779      */
7780     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7781 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7782 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7783 	 "Attribute references external entity '%s'\n", name);
7784     }
7785     /*
7786      * [ WFC: No < in Attribute Values ]
7787      * The replacement text of any entity referred to directly or
7788      * indirectly in an attribute value (other than "&lt;") must
7789      * not contain a <.
7790      */
7791     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7792 	     (ent != NULL) && (ent->content != NULL) &&
7793 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7794 	     (xmlStrchr(ent->content, '<'))) {
7795 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7796      "'<' in entity '%s' is not allowed in attributes values\n",
7797 			  name);
7798     }
7799 
7800     /*
7801      * Internal check, no parameter entities here ...
7802      */
7803     else {
7804 	switch (ent->etype) {
7805 	    case XML_INTERNAL_PARAMETER_ENTITY:
7806 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7807 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7808 	     "Attempt to reference the parameter entity '%s'\n",
7809 				  name);
7810 	    break;
7811 	    default:
7812 	    break;
7813 	}
7814     }
7815 
7816     /*
7817      * [ WFC: No Recursion ]
7818      * A parsed entity must not contain a recursive reference
7819      * to itself, either directly or indirectly.
7820      * Done somewhere else
7821      */
7822 
7823     xmlFree(name);
7824     *str = ptr;
7825     return(ent);
7826 }
7827 
7828 /**
7829  * xmlParsePEReference:
7830  * @ctxt:  an XML parser context
7831  *
7832  * parse PEReference declarations
7833  * The entity content is handled directly by pushing it's content as
7834  * a new input stream.
7835  *
7836  * [69] PEReference ::= '%' Name ';'
7837  *
7838  * [ WFC: No Recursion ]
7839  * A parsed entity must not contain a recursive
7840  * reference to itself, either directly or indirectly.
7841  *
7842  * [ WFC: Entity Declared ]
7843  * In a document without any DTD, a document with only an internal DTD
7844  * subset which contains no parameter entity references, or a document
7845  * with "standalone='yes'", ...  ... The declaration of a parameter
7846  * entity must precede any reference to it...
7847  *
7848  * [ VC: Entity Declared ]
7849  * In a document with an external subset or external parameter entities
7850  * with "standalone='no'", ...  ... The declaration of a parameter entity
7851  * must precede any reference to it...
7852  *
7853  * [ WFC: In DTD ]
7854  * Parameter-entity references may only appear in the DTD.
7855  * NOTE: misleading but this is handled.
7856  */
7857 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7858 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7859 {
7860     const xmlChar *name;
7861     xmlEntityPtr entity = NULL;
7862     xmlParserInputPtr input;
7863 
7864     if (RAW != '%')
7865         return;
7866     NEXT;
7867     name = xmlParseName(ctxt);
7868     if (name == NULL) {
7869 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7870 	return;
7871     }
7872     if (xmlParserDebugEntities)
7873 	xmlGenericError(xmlGenericErrorContext,
7874 		"PEReference: %s\n", name);
7875     if (RAW != ';') {
7876 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7877         return;
7878     }
7879 
7880     NEXT;
7881 
7882     /*
7883      * Increate the number of entity references parsed
7884      */
7885     ctxt->nbentities++;
7886 
7887     /*
7888      * Request the entity from SAX
7889      */
7890     if ((ctxt->sax != NULL) &&
7891 	(ctxt->sax->getParameterEntity != NULL))
7892 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7893     if (ctxt->instate == XML_PARSER_EOF)
7894 	return;
7895     if (entity == NULL) {
7896 	/*
7897 	 * [ WFC: Entity Declared ]
7898 	 * In a document without any DTD, a document with only an
7899 	 * internal DTD subset which contains no parameter entity
7900 	 * references, or a document with "standalone='yes'", ...
7901 	 * ... The declaration of a parameter entity must precede
7902 	 * any reference to it...
7903 	 */
7904 	if ((ctxt->standalone == 1) ||
7905 	    ((ctxt->hasExternalSubset == 0) &&
7906 	     (ctxt->hasPErefs == 0))) {
7907 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7908 			      "PEReference: %%%s; not found\n",
7909 			      name);
7910 	} else {
7911 	    /*
7912 	     * [ VC: Entity Declared ]
7913 	     * In a document with an external subset or external
7914 	     * parameter entities with "standalone='no'", ...
7915 	     * ... The declaration of a parameter entity must
7916 	     * precede any reference to it...
7917 	     */
7918             if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7919                 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920                                  "PEReference: %%%s; not found\n",
7921                                  name, NULL);
7922             } else
7923                 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7924                               "PEReference: %%%s; not found\n",
7925                               name, NULL);
7926             ctxt->valid = 0;
7927 	}
7928 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
7929     } else {
7930 	/*
7931 	 * Internal checking in case the entity quest barfed
7932 	 */
7933 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7934 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7935 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7936 		  "Internal: %%%s; is not a parameter entity\n",
7937 			  name, NULL);
7938 	} else {
7939             xmlChar start[4];
7940             xmlCharEncoding enc;
7941 
7942 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7943 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7944 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7945 		((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7946 		((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7947 		(ctxt->replaceEntities == 0) &&
7948 		(ctxt->validate == 0))
7949 		return;
7950 
7951 	    input = xmlNewEntityInputStream(ctxt, entity);
7952 	    if (xmlPushInput(ctxt, input) < 0) {
7953                 xmlFreeInputStream(input);
7954 		return;
7955             }
7956 
7957 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7958                 /*
7959                  * Get the 4 first bytes and decode the charset
7960                  * if enc != XML_CHAR_ENCODING_NONE
7961                  * plug some encoding conversion routines.
7962                  * Note that, since we may have some non-UTF8
7963                  * encoding (like UTF16, bug 135229), the 'length'
7964                  * is not known, but we can calculate based upon
7965                  * the amount of data in the buffer.
7966                  */
7967                 GROW
7968                 if (ctxt->instate == XML_PARSER_EOF)
7969                     return;
7970                 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7971                     start[0] = RAW;
7972                     start[1] = NXT(1);
7973                     start[2] = NXT(2);
7974                     start[3] = NXT(3);
7975                     enc = xmlDetectCharEncoding(start, 4);
7976                     if (enc != XML_CHAR_ENCODING_NONE) {
7977                         xmlSwitchEncoding(ctxt, enc);
7978                     }
7979                 }
7980 
7981                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7982                     (IS_BLANK_CH(NXT(5)))) {
7983                     xmlParseTextDecl(ctxt);
7984                 }
7985             }
7986 	}
7987     }
7988     ctxt->hasPErefs = 1;
7989 }
7990 
7991 /**
7992  * xmlLoadEntityContent:
7993  * @ctxt:  an XML parser context
7994  * @entity: an unloaded system entity
7995  *
7996  * Load the original content of the given system entity from the
7997  * ExternalID/SystemID given. This is to be used for Included in Literal
7998  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7999  *
8000  * Returns 0 in case of success and -1 in case of failure
8001  */
8002 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8003 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8004     xmlParserInputPtr input;
8005     xmlBufferPtr buf;
8006     int l, c;
8007     int count = 0;
8008 
8009     if ((ctxt == NULL) || (entity == NULL) ||
8010         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8011 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8012 	(entity->content != NULL)) {
8013 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8014 	            "xmlLoadEntityContent parameter error");
8015         return(-1);
8016     }
8017 
8018     if (xmlParserDebugEntities)
8019 	xmlGenericError(xmlGenericErrorContext,
8020 		"Reading %s entity content input\n", entity->name);
8021 
8022     buf = xmlBufferCreate();
8023     if (buf == NULL) {
8024 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8025 	            "xmlLoadEntityContent parameter error");
8026         return(-1);
8027     }
8028 
8029     input = xmlNewEntityInputStream(ctxt, entity);
8030     if (input == NULL) {
8031 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8032 	            "xmlLoadEntityContent input error");
8033 	xmlBufferFree(buf);
8034         return(-1);
8035     }
8036 
8037     /*
8038      * Push the entity as the current input, read char by char
8039      * saving to the buffer until the end of the entity or an error
8040      */
8041     if (xmlPushInput(ctxt, input) < 0) {
8042         xmlBufferFree(buf);
8043 	return(-1);
8044     }
8045 
8046     GROW;
8047     c = CUR_CHAR(l);
8048     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8049            (IS_CHAR(c))) {
8050         xmlBufferAdd(buf, ctxt->input->cur, l);
8051 	if (count++ > XML_PARSER_CHUNK_SIZE) {
8052 	    count = 0;
8053 	    GROW;
8054             if (ctxt->instate == XML_PARSER_EOF) {
8055                 xmlBufferFree(buf);
8056                 return(-1);
8057             }
8058 	}
8059 	NEXTL(l);
8060 	c = CUR_CHAR(l);
8061 	if (c == 0) {
8062 	    count = 0;
8063 	    GROW;
8064             if (ctxt->instate == XML_PARSER_EOF) {
8065                 xmlBufferFree(buf);
8066                 return(-1);
8067             }
8068 	    c = CUR_CHAR(l);
8069 	}
8070     }
8071 
8072     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8073         xmlPopInput(ctxt);
8074     } else if (!IS_CHAR(c)) {
8075         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8076                           "xmlLoadEntityContent: invalid char value %d\n",
8077 	                  c);
8078 	xmlBufferFree(buf);
8079 	return(-1);
8080     }
8081     entity->content = buf->content;
8082     buf->content = NULL;
8083     xmlBufferFree(buf);
8084 
8085     return(0);
8086 }
8087 
8088 /**
8089  * xmlParseStringPEReference:
8090  * @ctxt:  an XML parser context
8091  * @str:  a pointer to an index in the string
8092  *
8093  * parse PEReference declarations
8094  *
8095  * [69] PEReference ::= '%' Name ';'
8096  *
8097  * [ WFC: No Recursion ]
8098  * A parsed entity must not contain a recursive
8099  * reference to itself, either directly or indirectly.
8100  *
8101  * [ WFC: Entity Declared ]
8102  * In a document without any DTD, a document with only an internal DTD
8103  * subset which contains no parameter entity references, or a document
8104  * with "standalone='yes'", ...  ... The declaration of a parameter
8105  * entity must precede any reference to it...
8106  *
8107  * [ VC: Entity Declared ]
8108  * In a document with an external subset or external parameter entities
8109  * with "standalone='no'", ...  ... The declaration of a parameter entity
8110  * must precede any reference to it...
8111  *
8112  * [ WFC: In DTD ]
8113  * Parameter-entity references may only appear in the DTD.
8114  * NOTE: misleading but this is handled.
8115  *
8116  * Returns the string of the entity content.
8117  *         str is updated to the current value of the index
8118  */
8119 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8120 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8121     const xmlChar *ptr;
8122     xmlChar cur;
8123     xmlChar *name;
8124     xmlEntityPtr entity = NULL;
8125 
8126     if ((str == NULL) || (*str == NULL)) return(NULL);
8127     ptr = *str;
8128     cur = *ptr;
8129     if (cur != '%')
8130         return(NULL);
8131     ptr++;
8132     name = xmlParseStringName(ctxt, &ptr);
8133     if (name == NULL) {
8134 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8135 		       "xmlParseStringPEReference: no name\n");
8136 	*str = ptr;
8137 	return(NULL);
8138     }
8139     cur = *ptr;
8140     if (cur != ';') {
8141 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8142 	xmlFree(name);
8143 	*str = ptr;
8144 	return(NULL);
8145     }
8146     ptr++;
8147 
8148     /*
8149      * Increate the number of entity references parsed
8150      */
8151     ctxt->nbentities++;
8152 
8153     /*
8154      * Request the entity from SAX
8155      */
8156     if ((ctxt->sax != NULL) &&
8157 	(ctxt->sax->getParameterEntity != NULL))
8158 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8159     if (ctxt->instate == XML_PARSER_EOF) {
8160 	xmlFree(name);
8161 	*str = ptr;
8162 	return(NULL);
8163     }
8164     if (entity == NULL) {
8165 	/*
8166 	 * [ WFC: Entity Declared ]
8167 	 * In a document without any DTD, a document with only an
8168 	 * internal DTD subset which contains no parameter entity
8169 	 * references, or a document with "standalone='yes'", ...
8170 	 * ... The declaration of a parameter entity must precede
8171 	 * any reference to it...
8172 	 */
8173 	if ((ctxt->standalone == 1) ||
8174 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8175 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8176 		 "PEReference: %%%s; not found\n", name);
8177 	} else {
8178 	    /*
8179 	     * [ VC: Entity Declared ]
8180 	     * In a document with an external subset or external
8181 	     * parameter entities with "standalone='no'", ...
8182 	     * ... The declaration of a parameter entity must
8183 	     * precede any reference to it...
8184 	     */
8185 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8186 			  "PEReference: %%%s; not found\n",
8187 			  name, NULL);
8188 	    ctxt->valid = 0;
8189 	}
8190 	xmlParserEntityCheck(ctxt, 0, NULL, 0);
8191     } else {
8192 	/*
8193 	 * Internal checking in case the entity quest barfed
8194 	 */
8195 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8196 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8197 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8198 			  "%%%s; is not a parameter entity\n",
8199 			  name, NULL);
8200 	}
8201     }
8202     ctxt->hasPErefs = 1;
8203     xmlFree(name);
8204     *str = ptr;
8205     return(entity);
8206 }
8207 
8208 /**
8209  * xmlParseDocTypeDecl:
8210  * @ctxt:  an XML parser context
8211  *
8212  * parse a DOCTYPE declaration
8213  *
8214  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8215  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8216  *
8217  * [ VC: Root Element Type ]
8218  * The Name in the document type declaration must match the element
8219  * type of the root element.
8220  */
8221 
8222 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8223 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8224     const xmlChar *name = NULL;
8225     xmlChar *ExternalID = NULL;
8226     xmlChar *URI = NULL;
8227 
8228     /*
8229      * We know that '<!DOCTYPE' has been detected.
8230      */
8231     SKIP(9);
8232 
8233     SKIP_BLANKS;
8234 
8235     /*
8236      * Parse the DOCTYPE name.
8237      */
8238     name = xmlParseName(ctxt);
8239     if (name == NULL) {
8240 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8241 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8242     }
8243     ctxt->intSubName = name;
8244 
8245     SKIP_BLANKS;
8246 
8247     /*
8248      * Check for SystemID and ExternalID
8249      */
8250     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8251 
8252     if ((URI != NULL) || (ExternalID != NULL)) {
8253         ctxt->hasExternalSubset = 1;
8254     }
8255     ctxt->extSubURI = URI;
8256     ctxt->extSubSystem = ExternalID;
8257 
8258     SKIP_BLANKS;
8259 
8260     /*
8261      * Create and update the internal subset.
8262      */
8263     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8264 	(!ctxt->disableSAX))
8265 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8266     if (ctxt->instate == XML_PARSER_EOF)
8267 	return;
8268 
8269     /*
8270      * Is there any internal subset declarations ?
8271      * they are handled separately in xmlParseInternalSubset()
8272      */
8273     if (RAW == '[')
8274 	return;
8275 
8276     /*
8277      * We should be at the end of the DOCTYPE declaration.
8278      */
8279     if (RAW != '>') {
8280 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8281     }
8282     NEXT;
8283 }
8284 
8285 /**
8286  * xmlParseInternalSubset:
8287  * @ctxt:  an XML parser context
8288  *
8289  * parse the internal subset declaration
8290  *
8291  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8292  */
8293 
8294 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8295 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8296     /*
8297      * Is there any DTD definition ?
8298      */
8299     if (RAW == '[') {
8300         int baseInputNr = ctxt->inputNr;
8301         ctxt->instate = XML_PARSER_DTD;
8302         NEXT;
8303 	/*
8304 	 * Parse the succession of Markup declarations and
8305 	 * PEReferences.
8306 	 * Subsequence (markupdecl | PEReference | S)*
8307 	 */
8308 	while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8309                (ctxt->instate != XML_PARSER_EOF)) {
8310 	    const xmlChar *check = CUR_PTR;
8311 	    unsigned int cons = ctxt->input->consumed;
8312 
8313 	    SKIP_BLANKS;
8314 	    xmlParseMarkupDecl(ctxt);
8315 	    xmlParsePEReference(ctxt);
8316 
8317 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8318 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8319 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8320                 if (ctxt->inputNr > baseInputNr)
8321                     xmlPopInput(ctxt);
8322                 else
8323 		    break;
8324 	    }
8325 	}
8326 	if (RAW == ']') {
8327 	    NEXT;
8328 	    SKIP_BLANKS;
8329 	}
8330     }
8331 
8332     /*
8333      * We should be at the end of the DOCTYPE declaration.
8334      */
8335     if (RAW != '>') {
8336 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8337 	return;
8338     }
8339     NEXT;
8340 }
8341 
8342 #ifdef LIBXML_SAX1_ENABLED
8343 /**
8344  * xmlParseAttribute:
8345  * @ctxt:  an XML parser context
8346  * @value:  a xmlChar ** used to store the value of the attribute
8347  *
8348  * parse an attribute
8349  *
8350  * [41] Attribute ::= Name Eq AttValue
8351  *
8352  * [ WFC: No External Entity References ]
8353  * Attribute values cannot contain direct or indirect entity references
8354  * to external entities.
8355  *
8356  * [ WFC: No < in Attribute Values ]
8357  * The replacement text of any entity referred to directly or indirectly in
8358  * an attribute value (other than "&lt;") must not contain a <.
8359  *
8360  * [ VC: Attribute Value Type ]
8361  * The attribute must have been declared; the value must be of the type
8362  * declared for it.
8363  *
8364  * [25] Eq ::= S? '=' S?
8365  *
8366  * With namespace:
8367  *
8368  * [NS 11] Attribute ::= QName Eq AttValue
8369  *
8370  * Also the case QName == xmlns:??? is handled independently as a namespace
8371  * definition.
8372  *
8373  * Returns the attribute name, and the value in *value.
8374  */
8375 
8376 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8377 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8378     const xmlChar *name;
8379     xmlChar *val;
8380 
8381     *value = NULL;
8382     GROW;
8383     name = xmlParseName(ctxt);
8384     if (name == NULL) {
8385 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8386 	               "error parsing attribute name\n");
8387         return(NULL);
8388     }
8389 
8390     /*
8391      * read the value
8392      */
8393     SKIP_BLANKS;
8394     if (RAW == '=') {
8395         NEXT;
8396 	SKIP_BLANKS;
8397 	val = xmlParseAttValue(ctxt);
8398 	ctxt->instate = XML_PARSER_CONTENT;
8399     } else {
8400 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8401 	       "Specification mandates value for attribute %s\n", name);
8402 	return(NULL);
8403     }
8404 
8405     /*
8406      * Check that xml:lang conforms to the specification
8407      * No more registered as an error, just generate a warning now
8408      * since this was deprecated in XML second edition
8409      */
8410     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8411 	if (!xmlCheckLanguageID(val)) {
8412 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8413 		          "Malformed value for xml:lang : %s\n",
8414 			  val, NULL);
8415 	}
8416     }
8417 
8418     /*
8419      * Check that xml:space conforms to the specification
8420      */
8421     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8422 	if (xmlStrEqual(val, BAD_CAST "default"))
8423 	    *(ctxt->space) = 0;
8424 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8425 	    *(ctxt->space) = 1;
8426 	else {
8427 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8428 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8429                                  val, NULL);
8430 	}
8431     }
8432 
8433     *value = val;
8434     return(name);
8435 }
8436 
8437 /**
8438  * xmlParseStartTag:
8439  * @ctxt:  an XML parser context
8440  *
8441  * parse a start of tag either for rule element or
8442  * EmptyElement. In both case we don't parse the tag closing chars.
8443  *
8444  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8445  *
8446  * [ WFC: Unique Att Spec ]
8447  * No attribute name may appear more than once in the same start-tag or
8448  * empty-element tag.
8449  *
8450  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8451  *
8452  * [ WFC: Unique Att Spec ]
8453  * No attribute name may appear more than once in the same start-tag or
8454  * empty-element tag.
8455  *
8456  * With namespace:
8457  *
8458  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8459  *
8460  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8461  *
8462  * Returns the element name parsed
8463  */
8464 
8465 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8466 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8467     const xmlChar *name;
8468     const xmlChar *attname;
8469     xmlChar *attvalue;
8470     const xmlChar **atts = ctxt->atts;
8471     int nbatts = 0;
8472     int maxatts = ctxt->maxatts;
8473     int i;
8474 
8475     if (RAW != '<') return(NULL);
8476     NEXT1;
8477 
8478     name = xmlParseName(ctxt);
8479     if (name == NULL) {
8480 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8481 	     "xmlParseStartTag: invalid element name\n");
8482         return(NULL);
8483     }
8484 
8485     /*
8486      * Now parse the attributes, it ends up with the ending
8487      *
8488      * (S Attribute)* S?
8489      */
8490     SKIP_BLANKS;
8491     GROW;
8492 
8493     while (((RAW != '>') &&
8494 	   ((RAW != '/') || (NXT(1) != '>')) &&
8495 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8496 	const xmlChar *q = CUR_PTR;
8497 	unsigned int cons = ctxt->input->consumed;
8498 
8499 	attname = xmlParseAttribute(ctxt, &attvalue);
8500         if ((attname != NULL) && (attvalue != NULL)) {
8501 	    /*
8502 	     * [ WFC: Unique Att Spec ]
8503 	     * No attribute name may appear more than once in the same
8504 	     * start-tag or empty-element tag.
8505 	     */
8506 	    for (i = 0; i < nbatts;i += 2) {
8507 	        if (xmlStrEqual(atts[i], attname)) {
8508 		    xmlErrAttributeDup(ctxt, NULL, attname);
8509 		    xmlFree(attvalue);
8510 		    goto failed;
8511 		}
8512 	    }
8513 	    /*
8514 	     * Add the pair to atts
8515 	     */
8516 	    if (atts == NULL) {
8517 	        maxatts = 22; /* allow for 10 attrs by default */
8518 	        atts = (const xmlChar **)
8519 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8520 		if (atts == NULL) {
8521 		    xmlErrMemory(ctxt, NULL);
8522 		    if (attvalue != NULL)
8523 			xmlFree(attvalue);
8524 		    goto failed;
8525 		}
8526 		ctxt->atts = atts;
8527 		ctxt->maxatts = maxatts;
8528 	    } else if (nbatts + 4 > maxatts) {
8529 	        const xmlChar **n;
8530 
8531 	        maxatts *= 2;
8532 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8533 					     maxatts * sizeof(const xmlChar *));
8534 		if (n == NULL) {
8535 		    xmlErrMemory(ctxt, NULL);
8536 		    if (attvalue != NULL)
8537 			xmlFree(attvalue);
8538 		    goto failed;
8539 		}
8540 		atts = n;
8541 		ctxt->atts = atts;
8542 		ctxt->maxatts = maxatts;
8543 	    }
8544 	    atts[nbatts++] = attname;
8545 	    atts[nbatts++] = attvalue;
8546 	    atts[nbatts] = NULL;
8547 	    atts[nbatts + 1] = NULL;
8548 	} else {
8549 	    if (attvalue != NULL)
8550 		xmlFree(attvalue);
8551 	}
8552 
8553 failed:
8554 
8555 	GROW
8556 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8557 	    break;
8558 	if (SKIP_BLANKS == 0) {
8559 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8560 			   "attributes construct error\n");
8561 	}
8562         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8563             (attname == NULL) && (attvalue == NULL)) {
8564 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8565 			   "xmlParseStartTag: problem parsing attributes\n");
8566 	    break;
8567 	}
8568 	SHRINK;
8569         GROW;
8570     }
8571 
8572     /*
8573      * SAX: Start of Element !
8574      */
8575     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8576 	(!ctxt->disableSAX)) {
8577 	if (nbatts > 0)
8578 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8579 	else
8580 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8581     }
8582 
8583     if (atts != NULL) {
8584         /* Free only the content strings */
8585         for (i = 1;i < nbatts;i+=2)
8586 	    if (atts[i] != NULL)
8587 	       xmlFree((xmlChar *) atts[i]);
8588     }
8589     return(name);
8590 }
8591 
8592 /**
8593  * xmlParseEndTag1:
8594  * @ctxt:  an XML parser context
8595  * @line:  line of the start tag
8596  * @nsNr:  number of namespaces on the start tag
8597  *
8598  * parse an end of tag
8599  *
8600  * [42] ETag ::= '</' Name S? '>'
8601  *
8602  * With namespace
8603  *
8604  * [NS 9] ETag ::= '</' QName S? '>'
8605  */
8606 
8607 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8608 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8609     const xmlChar *name;
8610 
8611     GROW;
8612     if ((RAW != '<') || (NXT(1) != '/')) {
8613 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8614 		       "xmlParseEndTag: '</' not found\n");
8615 	return;
8616     }
8617     SKIP(2);
8618 
8619     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8620 
8621     /*
8622      * We should definitely be at the ending "S? '>'" part
8623      */
8624     GROW;
8625     SKIP_BLANKS;
8626     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8627 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8628     } else
8629 	NEXT1;
8630 
8631     /*
8632      * [ WFC: Element Type Match ]
8633      * The Name in an element's end-tag must match the element type in the
8634      * start-tag.
8635      *
8636      */
8637     if (name != (xmlChar*)1) {
8638         if (name == NULL) name = BAD_CAST "unparseable";
8639         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8640 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8641 		                ctxt->name, line, name);
8642     }
8643 
8644     /*
8645      * SAX: End of Tag
8646      */
8647     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8648 	(!ctxt->disableSAX))
8649         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8650 
8651     namePop(ctxt);
8652     spacePop(ctxt);
8653     return;
8654 }
8655 
8656 /**
8657  * xmlParseEndTag:
8658  * @ctxt:  an XML parser context
8659  *
8660  * parse an end of tag
8661  *
8662  * [42] ETag ::= '</' Name S? '>'
8663  *
8664  * With namespace
8665  *
8666  * [NS 9] ETag ::= '</' QName S? '>'
8667  */
8668 
8669 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8670 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8671     xmlParseEndTag1(ctxt, 0);
8672 }
8673 #endif /* LIBXML_SAX1_ENABLED */
8674 
8675 /************************************************************************
8676  *									*
8677  *		      SAX 2 specific operations				*
8678  *									*
8679  ************************************************************************/
8680 
8681 /*
8682  * xmlGetNamespace:
8683  * @ctxt:  an XML parser context
8684  * @prefix:  the prefix to lookup
8685  *
8686  * Lookup the namespace name for the @prefix (which ca be NULL)
8687  * The prefix must come from the @ctxt->dict dictionary
8688  *
8689  * Returns the namespace name or NULL if not bound
8690  */
8691 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8692 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8693     int i;
8694 
8695     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8696     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8697         if (ctxt->nsTab[i] == prefix) {
8698 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8699 	        return(NULL);
8700 	    return(ctxt->nsTab[i + 1]);
8701 	}
8702     return(NULL);
8703 }
8704 
8705 /**
8706  * xmlParseQName:
8707  * @ctxt:  an XML parser context
8708  * @prefix:  pointer to store the prefix part
8709  *
8710  * parse an XML Namespace QName
8711  *
8712  * [6]  QName  ::= (Prefix ':')? LocalPart
8713  * [7]  Prefix  ::= NCName
8714  * [8]  LocalPart  ::= NCName
8715  *
8716  * Returns the Name parsed or NULL
8717  */
8718 
8719 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8720 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8721     const xmlChar *l, *p;
8722 
8723     GROW;
8724 
8725     l = xmlParseNCName(ctxt);
8726     if (l == NULL) {
8727         if (CUR == ':') {
8728 	    l = xmlParseName(ctxt);
8729 	    if (l != NULL) {
8730 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8731 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8732 		*prefix = NULL;
8733 		return(l);
8734 	    }
8735 	}
8736         return(NULL);
8737     }
8738     if (CUR == ':') {
8739         NEXT;
8740 	p = l;
8741 	l = xmlParseNCName(ctxt);
8742 	if (l == NULL) {
8743 	    xmlChar *tmp;
8744 
8745             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8746 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8747 	    l = xmlParseNmtoken(ctxt);
8748 	    if (l == NULL)
8749 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8750 	    else {
8751 		tmp = xmlBuildQName(l, p, NULL, 0);
8752 		xmlFree((char *)l);
8753 	    }
8754 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8755 	    if (tmp != NULL) xmlFree(tmp);
8756 	    *prefix = NULL;
8757 	    return(p);
8758 	}
8759 	if (CUR == ':') {
8760 	    xmlChar *tmp;
8761 
8762             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8763 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8764 	    NEXT;
8765 	    tmp = (xmlChar *) xmlParseName(ctxt);
8766 	    if (tmp != NULL) {
8767 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8768 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8769 		if (tmp != NULL) xmlFree(tmp);
8770 		*prefix = p;
8771 		return(l);
8772 	    }
8773 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8774 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8775 	    if (tmp != NULL) xmlFree(tmp);
8776 	    *prefix = p;
8777 	    return(l);
8778 	}
8779 	*prefix = p;
8780     } else
8781         *prefix = NULL;
8782     return(l);
8783 }
8784 
8785 /**
8786  * xmlParseQNameAndCompare:
8787  * @ctxt:  an XML parser context
8788  * @name:  the localname
8789  * @prefix:  the prefix, if any.
8790  *
8791  * parse an XML name and compares for match
8792  * (specialized for endtag parsing)
8793  *
8794  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8795  * and the name for mismatch
8796  */
8797 
8798 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8799 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8800                         xmlChar const *prefix) {
8801     const xmlChar *cmp;
8802     const xmlChar *in;
8803     const xmlChar *ret;
8804     const xmlChar *prefix2;
8805 
8806     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8807 
8808     GROW;
8809     in = ctxt->input->cur;
8810 
8811     cmp = prefix;
8812     while (*in != 0 && *in == *cmp) {
8813 	++in;
8814 	++cmp;
8815     }
8816     if ((*cmp == 0) && (*in == ':')) {
8817         in++;
8818 	cmp = name;
8819 	while (*in != 0 && *in == *cmp) {
8820 	    ++in;
8821 	    ++cmp;
8822 	}
8823 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8824 	    /* success */
8825 	    ctxt->input->cur = in;
8826 	    return((const xmlChar*) 1);
8827 	}
8828     }
8829     /*
8830      * all strings coms from the dictionary, equality can be done directly
8831      */
8832     ret = xmlParseQName (ctxt, &prefix2);
8833     if ((ret == name) && (prefix == prefix2))
8834 	return((const xmlChar*) 1);
8835     return ret;
8836 }
8837 
8838 /**
8839  * xmlParseAttValueInternal:
8840  * @ctxt:  an XML parser context
8841  * @len:  attribute len result
8842  * @alloc:  whether the attribute was reallocated as a new string
8843  * @normalize:  if 1 then further non-CDATA normalization must be done
8844  *
8845  * parse a value for an attribute.
8846  * NOTE: if no normalization is needed, the routine will return pointers
8847  *       directly from the data buffer.
8848  *
8849  * 3.3.3 Attribute-Value Normalization:
8850  * Before the value of an attribute is passed to the application or
8851  * checked for validity, the XML processor must normalize it as follows:
8852  * - a character reference is processed by appending the referenced
8853  *   character to the attribute value
8854  * - an entity reference is processed by recursively processing the
8855  *   replacement text of the entity
8856  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8857  *   appending #x20 to the normalized value, except that only a single
8858  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8859  *   parsed entity or the literal entity value of an internal parsed entity
8860  * - other characters are processed by appending them to the normalized value
8861  * If the declared value is not CDATA, then the XML processor must further
8862  * process the normalized attribute value by discarding any leading and
8863  * trailing space (#x20) characters, and by replacing sequences of space
8864  * (#x20) characters by a single space (#x20) character.
8865  * All attributes for which no declaration has been read should be treated
8866  * by a non-validating parser as if declared CDATA.
8867  *
8868  * Returns the AttValue parsed or NULL. The value has to be freed by the
8869  *     caller if it was copied, this can be detected by val[*len] == 0.
8870  */
8871 
8872 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8873 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8874                          int normalize)
8875 {
8876     xmlChar limit = 0;
8877     const xmlChar *in = NULL, *start, *end, *last;
8878     xmlChar *ret = NULL;
8879     int line, col;
8880 
8881     GROW;
8882     in = (xmlChar *) CUR_PTR;
8883     line = ctxt->input->line;
8884     col = ctxt->input->col;
8885     if (*in != '"' && *in != '\'') {
8886         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8887         return (NULL);
8888     }
8889     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8890 
8891     /*
8892      * try to handle in this routine the most common case where no
8893      * allocation of a new string is required and where content is
8894      * pure ASCII.
8895      */
8896     limit = *in++;
8897     col++;
8898     end = ctxt->input->end;
8899     start = in;
8900     if (in >= end) {
8901         const xmlChar *oldbase = ctxt->input->base;
8902 	GROW;
8903 	if (oldbase != ctxt->input->base) {
8904 	    long delta = ctxt->input->base - oldbase;
8905 	    start = start + delta;
8906 	    in = in + delta;
8907 	}
8908 	end = ctxt->input->end;
8909     }
8910     if (normalize) {
8911         /*
8912 	 * Skip any leading spaces
8913 	 */
8914 	while ((in < end) && (*in != limit) &&
8915 	       ((*in == 0x20) || (*in == 0x9) ||
8916 	        (*in == 0xA) || (*in == 0xD))) {
8917 	    if (*in == 0xA) {
8918 	        line++; col = 1;
8919 	    } else {
8920 	        col++;
8921 	    }
8922 	    in++;
8923 	    start = in;
8924 	    if (in >= end) {
8925 		const xmlChar *oldbase = ctxt->input->base;
8926 		GROW;
8927                 if (ctxt->instate == XML_PARSER_EOF)
8928                     return(NULL);
8929 		if (oldbase != ctxt->input->base) {
8930 		    long delta = ctxt->input->base - oldbase;
8931 		    start = start + delta;
8932 		    in = in + delta;
8933 		}
8934 		end = ctxt->input->end;
8935                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8936                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8937                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8938                                    "AttValue length too long\n");
8939                     return(NULL);
8940                 }
8941 	    }
8942 	}
8943 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8944 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8945 	    col++;
8946 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8947 	    if (in >= end) {
8948 		const xmlChar *oldbase = ctxt->input->base;
8949 		GROW;
8950                 if (ctxt->instate == XML_PARSER_EOF)
8951                     return(NULL);
8952 		if (oldbase != ctxt->input->base) {
8953 		    long delta = ctxt->input->base - oldbase;
8954 		    start = start + delta;
8955 		    in = in + delta;
8956 		}
8957 		end = ctxt->input->end;
8958                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8959                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8960                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8961                                    "AttValue length too long\n");
8962                     return(NULL);
8963                 }
8964 	    }
8965 	}
8966 	last = in;
8967 	/*
8968 	 * skip the trailing blanks
8969 	 */
8970 	while ((last[-1] == 0x20) && (last > start)) last--;
8971 	while ((in < end) && (*in != limit) &&
8972 	       ((*in == 0x20) || (*in == 0x9) ||
8973 	        (*in == 0xA) || (*in == 0xD))) {
8974 	    if (*in == 0xA) {
8975 	        line++, col = 1;
8976 	    } else {
8977 	        col++;
8978 	    }
8979 	    in++;
8980 	    if (in >= end) {
8981 		const xmlChar *oldbase = ctxt->input->base;
8982 		GROW;
8983                 if (ctxt->instate == XML_PARSER_EOF)
8984                     return(NULL);
8985 		if (oldbase != ctxt->input->base) {
8986 		    long delta = ctxt->input->base - oldbase;
8987 		    start = start + delta;
8988 		    in = in + delta;
8989 		    last = last + delta;
8990 		}
8991 		end = ctxt->input->end;
8992                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8993                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8994                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8995                                    "AttValue length too long\n");
8996                     return(NULL);
8997                 }
8998 	    }
8999 	}
9000         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9001             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9002             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9003                            "AttValue length too long\n");
9004             return(NULL);
9005         }
9006 	if (*in != limit) goto need_complex;
9007     } else {
9008 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9009 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9010 	    in++;
9011 	    col++;
9012 	    if (in >= end) {
9013 		const xmlChar *oldbase = ctxt->input->base;
9014 		GROW;
9015                 if (ctxt->instate == XML_PARSER_EOF)
9016                     return(NULL);
9017 		if (oldbase != ctxt->input->base) {
9018 		    long delta = ctxt->input->base - oldbase;
9019 		    start = start + delta;
9020 		    in = in + delta;
9021 		}
9022 		end = ctxt->input->end;
9023                 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9024                     ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9025                     xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9026                                    "AttValue length too long\n");
9027                     return(NULL);
9028                 }
9029 	    }
9030 	}
9031 	last = in;
9032         if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9033             ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9034             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9035                            "AttValue length too long\n");
9036             return(NULL);
9037         }
9038 	if (*in != limit) goto need_complex;
9039     }
9040     in++;
9041     col++;
9042     if (len != NULL) {
9043         *len = last - start;
9044         ret = (xmlChar *) start;
9045     } else {
9046         if (alloc) *alloc = 1;
9047         ret = xmlStrndup(start, last - start);
9048     }
9049     CUR_PTR = in;
9050     ctxt->input->line = line;
9051     ctxt->input->col = col;
9052     if (alloc) *alloc = 0;
9053     return ret;
9054 need_complex:
9055     if (alloc) *alloc = 1;
9056     return xmlParseAttValueComplex(ctxt, len, normalize);
9057 }
9058 
9059 /**
9060  * xmlParseAttribute2:
9061  * @ctxt:  an XML parser context
9062  * @pref:  the element prefix
9063  * @elem:  the element name
9064  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9065  * @value:  a xmlChar ** used to store the value of the attribute
9066  * @len:  an int * to save the length of the attribute
9067  * @alloc:  an int * to indicate if the attribute was allocated
9068  *
9069  * parse an attribute in the new SAX2 framework.
9070  *
9071  * Returns the attribute name, and the value in *value, .
9072  */
9073 
9074 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9075 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9076                    const xmlChar * pref, const xmlChar * elem,
9077                    const xmlChar ** prefix, xmlChar ** value,
9078                    int *len, int *alloc)
9079 {
9080     const xmlChar *name;
9081     xmlChar *val, *internal_val = NULL;
9082     int normalize = 0;
9083 
9084     *value = NULL;
9085     GROW;
9086     name = xmlParseQName(ctxt, prefix);
9087     if (name == NULL) {
9088         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9089                        "error parsing attribute name\n");
9090         return (NULL);
9091     }
9092 
9093     /*
9094      * get the type if needed
9095      */
9096     if (ctxt->attsSpecial != NULL) {
9097         int type;
9098 
9099         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9100                                                  pref, elem, *prefix, name);
9101         if (type != 0)
9102             normalize = 1;
9103     }
9104 
9105     /*
9106      * read the value
9107      */
9108     SKIP_BLANKS;
9109     if (RAW == '=') {
9110         NEXT;
9111         SKIP_BLANKS;
9112         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9113 	if (normalize) {
9114 	    /*
9115 	     * Sometimes a second normalisation pass for spaces is needed
9116 	     * but that only happens if charrefs or entities refernces
9117 	     * have been used in the attribute value, i.e. the attribute
9118 	     * value have been extracted in an allocated string already.
9119 	     */
9120 	    if (*alloc) {
9121 	        const xmlChar *val2;
9122 
9123 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9124 		if ((val2 != NULL) && (val2 != val)) {
9125 		    xmlFree(val);
9126 		    val = (xmlChar *) val2;
9127 		}
9128 	    }
9129 	}
9130         ctxt->instate = XML_PARSER_CONTENT;
9131     } else {
9132         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9133                           "Specification mandates value for attribute %s\n",
9134                           name);
9135         return (NULL);
9136     }
9137 
9138     if (*prefix == ctxt->str_xml) {
9139         /*
9140          * Check that xml:lang conforms to the specification
9141          * No more registered as an error, just generate a warning now
9142          * since this was deprecated in XML second edition
9143          */
9144         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9145             internal_val = xmlStrndup(val, *len);
9146             if (!xmlCheckLanguageID(internal_val)) {
9147                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9148                               "Malformed value for xml:lang : %s\n",
9149                               internal_val, NULL);
9150             }
9151         }
9152 
9153         /*
9154          * Check that xml:space conforms to the specification
9155          */
9156         if (xmlStrEqual(name, BAD_CAST "space")) {
9157             internal_val = xmlStrndup(val, *len);
9158             if (xmlStrEqual(internal_val, BAD_CAST "default"))
9159                 *(ctxt->space) = 0;
9160             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9161                 *(ctxt->space) = 1;
9162             else {
9163                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9164                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9165                               internal_val, NULL);
9166             }
9167         }
9168         if (internal_val) {
9169             xmlFree(internal_val);
9170         }
9171     }
9172 
9173     *value = val;
9174     return (name);
9175 }
9176 /**
9177  * xmlParseStartTag2:
9178  * @ctxt:  an XML parser context
9179  *
9180  * parse a start of tag either for rule element or
9181  * EmptyElement. In both case we don't parse the tag closing chars.
9182  * This routine is called when running SAX2 parsing
9183  *
9184  * [40] STag ::= '<' Name (S Attribute)* S? '>'
9185  *
9186  * [ WFC: Unique Att Spec ]
9187  * No attribute name may appear more than once in the same start-tag or
9188  * empty-element tag.
9189  *
9190  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9191  *
9192  * [ WFC: Unique Att Spec ]
9193  * No attribute name may appear more than once in the same start-tag or
9194  * empty-element tag.
9195  *
9196  * With namespace:
9197  *
9198  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9199  *
9200  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9201  *
9202  * Returns the element name parsed
9203  */
9204 
9205 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9206 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9207                   const xmlChar **URI, int *tlen) {
9208     const xmlChar *localname;
9209     const xmlChar *prefix;
9210     const xmlChar *attname;
9211     const xmlChar *aprefix;
9212     const xmlChar *nsname;
9213     xmlChar *attvalue;
9214     const xmlChar **atts = ctxt->atts;
9215     int maxatts = ctxt->maxatts;
9216     int nratts, nbatts, nbdef, inputid;
9217     int i, j, nbNs, attval;
9218     unsigned long cur;
9219     int nsNr = ctxt->nsNr;
9220 
9221     if (RAW != '<') return(NULL);
9222     NEXT1;
9223 
9224     /*
9225      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9226      *       point since the attribute values may be stored as pointers to
9227      *       the buffer and calling SHRINK would destroy them !
9228      *       The Shrinking is only possible once the full set of attribute
9229      *       callbacks have been done.
9230      */
9231     SHRINK;
9232     cur = ctxt->input->cur - ctxt->input->base;
9233     inputid = ctxt->input->id;
9234     nbatts = 0;
9235     nratts = 0;
9236     nbdef = 0;
9237     nbNs = 0;
9238     attval = 0;
9239     /* Forget any namespaces added during an earlier parse of this element. */
9240     ctxt->nsNr = nsNr;
9241 
9242     localname = xmlParseQName(ctxt, &prefix);
9243     if (localname == NULL) {
9244 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9245 		       "StartTag: invalid element name\n");
9246         return(NULL);
9247     }
9248     *tlen = ctxt->input->cur - ctxt->input->base - cur;
9249 
9250     /*
9251      * Now parse the attributes, it ends up with the ending
9252      *
9253      * (S Attribute)* S?
9254      */
9255     SKIP_BLANKS;
9256     GROW;
9257 
9258     while (((RAW != '>') &&
9259 	   ((RAW != '/') || (NXT(1) != '>')) &&
9260 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9261 	const xmlChar *q = CUR_PTR;
9262 	unsigned int cons = ctxt->input->consumed;
9263 	int len = -1, alloc = 0;
9264 
9265 	attname = xmlParseAttribute2(ctxt, prefix, localname,
9266 	                             &aprefix, &attvalue, &len, &alloc);
9267         if ((attname == NULL) || (attvalue == NULL))
9268             goto next_attr;
9269 	if (len < 0) len = xmlStrlen(attvalue);
9270 
9271         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9272             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9273             xmlURIPtr uri;
9274 
9275             if (URL == NULL) {
9276                 xmlErrMemory(ctxt, "dictionary allocation failure");
9277                 if ((attvalue != NULL) && (alloc != 0))
9278                     xmlFree(attvalue);
9279                 return(NULL);
9280             }
9281             if (*URL != 0) {
9282                 uri = xmlParseURI((const char *) URL);
9283                 if (uri == NULL) {
9284                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9285                              "xmlns: '%s' is not a valid URI\n",
9286                                        URL, NULL, NULL);
9287                 } else {
9288                     if (uri->scheme == NULL) {
9289                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9290                                   "xmlns: URI %s is not absolute\n",
9291                                   URL, NULL, NULL);
9292                     }
9293                     xmlFreeURI(uri);
9294                 }
9295                 if (URL == ctxt->str_xml_ns) {
9296                     if (attname != ctxt->str_xml) {
9297                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9298                      "xml namespace URI cannot be the default namespace\n",
9299                                  NULL, NULL, NULL);
9300                     }
9301                     goto next_attr;
9302                 }
9303                 if ((len == 29) &&
9304                     (xmlStrEqual(URL,
9305                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9306                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9307                          "reuse of the xmlns namespace name is forbidden\n",
9308                              NULL, NULL, NULL);
9309                     goto next_attr;
9310                 }
9311             }
9312             /*
9313              * check that it's not a defined namespace
9314              */
9315             for (j = 1;j <= nbNs;j++)
9316                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9317                     break;
9318             if (j <= nbNs)
9319                 xmlErrAttributeDup(ctxt, NULL, attname);
9320             else
9321                 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9322 
9323         } else if (aprefix == ctxt->str_xmlns) {
9324             const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9325             xmlURIPtr uri;
9326 
9327             if (attname == ctxt->str_xml) {
9328                 if (URL != ctxt->str_xml_ns) {
9329                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330                              "xml namespace prefix mapped to wrong URI\n",
9331                              NULL, NULL, NULL);
9332                 }
9333                 /*
9334                  * Do not keep a namespace definition node
9335                  */
9336                 goto next_attr;
9337             }
9338             if (URL == ctxt->str_xml_ns) {
9339                 if (attname != ctxt->str_xml) {
9340                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9341                              "xml namespace URI mapped to wrong prefix\n",
9342                              NULL, NULL, NULL);
9343                 }
9344                 goto next_attr;
9345             }
9346             if (attname == ctxt->str_xmlns) {
9347                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348                          "redefinition of the xmlns prefix is forbidden\n",
9349                          NULL, NULL, NULL);
9350                 goto next_attr;
9351             }
9352             if ((len == 29) &&
9353                 (xmlStrEqual(URL,
9354                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9355                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356                          "reuse of the xmlns namespace name is forbidden\n",
9357                          NULL, NULL, NULL);
9358                 goto next_attr;
9359             }
9360             if ((URL == NULL) || (URL[0] == 0)) {
9361                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9362                          "xmlns:%s: Empty XML namespace is not allowed\n",
9363                               attname, NULL, NULL);
9364                 goto next_attr;
9365             } else {
9366                 uri = xmlParseURI((const char *) URL);
9367                 if (uri == NULL) {
9368                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9369                          "xmlns:%s: '%s' is not a valid URI\n",
9370                                        attname, URL, NULL);
9371                 } else {
9372                     if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9373                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9374                                   "xmlns:%s: URI %s is not absolute\n",
9375                                   attname, URL, NULL);
9376                     }
9377                     xmlFreeURI(uri);
9378                 }
9379             }
9380 
9381             /*
9382              * check that it's not a defined namespace
9383              */
9384             for (j = 1;j <= nbNs;j++)
9385                 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9386                     break;
9387             if (j <= nbNs)
9388                 xmlErrAttributeDup(ctxt, aprefix, attname);
9389             else
9390                 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9391 
9392         } else {
9393             /*
9394              * Add the pair to atts
9395              */
9396             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9397                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9398                     goto next_attr;
9399                 }
9400                 maxatts = ctxt->maxatts;
9401                 atts = ctxt->atts;
9402             }
9403             ctxt->attallocs[nratts++] = alloc;
9404             atts[nbatts++] = attname;
9405             atts[nbatts++] = aprefix;
9406             /*
9407              * The namespace URI field is used temporarily to point at the
9408              * base of the current input buffer for non-alloced attributes.
9409              * When the input buffer is reallocated, all the pointers become
9410              * invalid, but they can be reconstructed later.
9411              */
9412             if (alloc)
9413                 atts[nbatts++] = NULL;
9414             else
9415                 atts[nbatts++] = ctxt->input->base;
9416             atts[nbatts++] = attvalue;
9417             attvalue += len;
9418             atts[nbatts++] = attvalue;
9419             /*
9420              * tag if some deallocation is needed
9421              */
9422             if (alloc != 0) attval = 1;
9423             attvalue = NULL; /* moved into atts */
9424         }
9425 
9426 next_attr:
9427         if ((attvalue != NULL) && (alloc != 0)) {
9428             xmlFree(attvalue);
9429             attvalue = NULL;
9430         }
9431 
9432 	GROW
9433         if (ctxt->instate == XML_PARSER_EOF)
9434             break;
9435 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9436 	    break;
9437 	if (SKIP_BLANKS == 0) {
9438 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9439 			   "attributes construct error\n");
9440 	    break;
9441 	}
9442         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9443             (attname == NULL) && (attvalue == NULL)) {
9444 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9445 	         "xmlParseStartTag: problem parsing attributes\n");
9446 	    break;
9447 	}
9448         GROW;
9449     }
9450 
9451     if (ctxt->input->id != inputid) {
9452         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9453                     "Unexpected change of input\n");
9454         localname = NULL;
9455         goto done;
9456     }
9457 
9458     /* Reconstruct attribute value pointers. */
9459     for (i = 0, j = 0; j < nratts; i += 5, j++) {
9460         if (atts[i+2] != NULL) {
9461             /*
9462              * Arithmetic on dangling pointers is technically undefined
9463              * behavior, but well...
9464              */
9465             ptrdiff_t offset = ctxt->input->base - atts[i+2];
9466             atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9467             atts[i+3] += offset;  /* value */
9468             atts[i+4] += offset;  /* valuend */
9469         }
9470     }
9471 
9472     /*
9473      * The attributes defaulting
9474      */
9475     if (ctxt->attsDefault != NULL) {
9476         xmlDefAttrsPtr defaults;
9477 
9478 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9479 	if (defaults != NULL) {
9480 	    for (i = 0;i < defaults->nbAttrs;i++) {
9481 	        attname = defaults->values[5 * i];
9482 		aprefix = defaults->values[5 * i + 1];
9483 
9484                 /*
9485 		 * special work for namespaces defaulted defs
9486 		 */
9487 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9488 		    /*
9489 		     * check that it's not a defined namespace
9490 		     */
9491 		    for (j = 1;j <= nbNs;j++)
9492 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9493 			    break;
9494 	            if (j <= nbNs) continue;
9495 
9496 		    nsname = xmlGetNamespace(ctxt, NULL);
9497 		    if (nsname != defaults->values[5 * i + 2]) {
9498 			if (nsPush(ctxt, NULL,
9499 			           defaults->values[5 * i + 2]) > 0)
9500 			    nbNs++;
9501 		    }
9502 		} else if (aprefix == ctxt->str_xmlns) {
9503 		    /*
9504 		     * check that it's not a defined namespace
9505 		     */
9506 		    for (j = 1;j <= nbNs;j++)
9507 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9508 			    break;
9509 	            if (j <= nbNs) continue;
9510 
9511 		    nsname = xmlGetNamespace(ctxt, attname);
9512 		    if (nsname != defaults->values[2]) {
9513 			if (nsPush(ctxt, attname,
9514 			           defaults->values[5 * i + 2]) > 0)
9515 			    nbNs++;
9516 		    }
9517 		} else {
9518 		    /*
9519 		     * check that it's not a defined attribute
9520 		     */
9521 		    for (j = 0;j < nbatts;j+=5) {
9522 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9523 			    break;
9524 		    }
9525 		    if (j < nbatts) continue;
9526 
9527 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9528 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9529 			    return(NULL);
9530 			}
9531 			maxatts = ctxt->maxatts;
9532 			atts = ctxt->atts;
9533 		    }
9534 		    atts[nbatts++] = attname;
9535 		    atts[nbatts++] = aprefix;
9536 		    if (aprefix == NULL)
9537 			atts[nbatts++] = NULL;
9538 		    else
9539 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9540 		    atts[nbatts++] = defaults->values[5 * i + 2];
9541 		    atts[nbatts++] = defaults->values[5 * i + 3];
9542 		    if ((ctxt->standalone == 1) &&
9543 		        (defaults->values[5 * i + 4] != NULL)) {
9544 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9545 	  "standalone: attribute %s on %s defaulted from external subset\n",
9546 	                                 attname, localname);
9547 		    }
9548 		    nbdef++;
9549 		}
9550 	    }
9551 	}
9552     }
9553 
9554     /*
9555      * The attributes checkings
9556      */
9557     for (i = 0; i < nbatts;i += 5) {
9558         /*
9559 	* The default namespace does not apply to attribute names.
9560 	*/
9561 	if (atts[i + 1] != NULL) {
9562 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9563 	    if (nsname == NULL) {
9564 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9565 		    "Namespace prefix %s for %s on %s is not defined\n",
9566 		    atts[i + 1], atts[i], localname);
9567 	    }
9568 	    atts[i + 2] = nsname;
9569 	} else
9570 	    nsname = NULL;
9571 	/*
9572 	 * [ WFC: Unique Att Spec ]
9573 	 * No attribute name may appear more than once in the same
9574 	 * start-tag or empty-element tag.
9575 	 * As extended by the Namespace in XML REC.
9576 	 */
9577         for (j = 0; j < i;j += 5) {
9578 	    if (atts[i] == atts[j]) {
9579 	        if (atts[i+1] == atts[j+1]) {
9580 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9581 		    break;
9582 		}
9583 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9584 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9585 			     "Namespaced Attribute %s in '%s' redefined\n",
9586 			     atts[i], nsname, NULL);
9587 		    break;
9588 		}
9589 	    }
9590 	}
9591     }
9592 
9593     nsname = xmlGetNamespace(ctxt, prefix);
9594     if ((prefix != NULL) && (nsname == NULL)) {
9595 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9596 	         "Namespace prefix %s on %s is not defined\n",
9597 		 prefix, localname, NULL);
9598     }
9599     *pref = prefix;
9600     *URI = nsname;
9601 
9602     /*
9603      * SAX: Start of Element !
9604      */
9605     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9606 	(!ctxt->disableSAX)) {
9607 	if (nbNs > 0)
9608 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9610 			  nbatts / 5, nbdef, atts);
9611 	else
9612 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9613 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9614     }
9615 
9616 done:
9617     /*
9618      * Free up attribute allocated strings if needed
9619      */
9620     if (attval != 0) {
9621 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9622 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9623 	        xmlFree((xmlChar *) atts[i]);
9624     }
9625 
9626     return(localname);
9627 }
9628 
9629 /**
9630  * xmlParseEndTag2:
9631  * @ctxt:  an XML parser context
9632  * @line:  line of the start tag
9633  * @nsNr:  number of namespaces on the start tag
9634  *
9635  * parse an end of tag
9636  *
9637  * [42] ETag ::= '</' Name S? '>'
9638  *
9639  * With namespace
9640  *
9641  * [NS 9] ETag ::= '</' QName S? '>'
9642  */
9643 
9644 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9645 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9646                 const xmlChar *URI, int line, int nsNr, int tlen) {
9647     const xmlChar *name;
9648     size_t curLength;
9649 
9650     GROW;
9651     if ((RAW != '<') || (NXT(1) != '/')) {
9652 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9653 	return;
9654     }
9655     SKIP(2);
9656 
9657     curLength = ctxt->input->end - ctxt->input->cur;
9658     if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9659         (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9660         if ((curLength >= (size_t)(tlen + 1)) &&
9661 	    (ctxt->input->cur[tlen] == '>')) {
9662 	    ctxt->input->cur += tlen + 1;
9663 	    ctxt->input->col += tlen + 1;
9664 	    goto done;
9665 	}
9666 	ctxt->input->cur += tlen;
9667 	ctxt->input->col += tlen;
9668 	name = (xmlChar*)1;
9669     } else {
9670 	if (prefix == NULL)
9671 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9672 	else
9673 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9674     }
9675 
9676     /*
9677      * We should definitely be at the ending "S? '>'" part
9678      */
9679     GROW;
9680     if (ctxt->instate == XML_PARSER_EOF)
9681         return;
9682     SKIP_BLANKS;
9683     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9684 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9685     } else
9686 	NEXT1;
9687 
9688     /*
9689      * [ WFC: Element Type Match ]
9690      * The Name in an element's end-tag must match the element type in the
9691      * start-tag.
9692      *
9693      */
9694     if (name != (xmlChar*)1) {
9695         if (name == NULL) name = BAD_CAST "unparseable";
9696         if ((line == 0) && (ctxt->node != NULL))
9697             line = ctxt->node->line;
9698         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9699 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9700 		                ctxt->name, line, name);
9701     }
9702 
9703     /*
9704      * SAX: End of Tag
9705      */
9706 done:
9707     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9708 	(!ctxt->disableSAX))
9709 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9710 
9711     spacePop(ctxt);
9712     if (nsNr != 0)
9713 	nsPop(ctxt, nsNr);
9714     return;
9715 }
9716 
9717 /**
9718  * xmlParseCDSect:
9719  * @ctxt:  an XML parser context
9720  *
9721  * Parse escaped pure raw content.
9722  *
9723  * [18] CDSect ::= CDStart CData CDEnd
9724  *
9725  * [19] CDStart ::= '<![CDATA['
9726  *
9727  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9728  *
9729  * [21] CDEnd ::= ']]>'
9730  */
9731 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9732 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9733     xmlChar *buf = NULL;
9734     int len = 0;
9735     int size = XML_PARSER_BUFFER_SIZE;
9736     int r, rl;
9737     int	s, sl;
9738     int cur, l;
9739     int count = 0;
9740 
9741     /* Check 2.6.0 was NXT(0) not RAW */
9742     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9743 	SKIP(9);
9744     } else
9745         return;
9746 
9747     ctxt->instate = XML_PARSER_CDATA_SECTION;
9748     r = CUR_CHAR(rl);
9749     if (!IS_CHAR(r)) {
9750 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9751 	ctxt->instate = XML_PARSER_CONTENT;
9752         return;
9753     }
9754     NEXTL(rl);
9755     s = CUR_CHAR(sl);
9756     if (!IS_CHAR(s)) {
9757 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9758 	ctxt->instate = XML_PARSER_CONTENT;
9759         return;
9760     }
9761     NEXTL(sl);
9762     cur = CUR_CHAR(l);
9763     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9764     if (buf == NULL) {
9765 	xmlErrMemory(ctxt, NULL);
9766 	return;
9767     }
9768     while (IS_CHAR(cur) &&
9769            ((r != ']') || (s != ']') || (cur != '>'))) {
9770 	if (len + 5 >= size) {
9771 	    xmlChar *tmp;
9772 
9773             if ((size > XML_MAX_TEXT_LENGTH) &&
9774                 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9775                 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9776                              "CData section too big found", NULL);
9777                 xmlFree (buf);
9778                 return;
9779             }
9780 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9781 	    if (tmp == NULL) {
9782 	        xmlFree(buf);
9783 		xmlErrMemory(ctxt, NULL);
9784 		return;
9785 	    }
9786 	    buf = tmp;
9787 	    size *= 2;
9788 	}
9789 	COPY_BUF(rl,buf,len,r);
9790 	r = s;
9791 	rl = sl;
9792 	s = cur;
9793 	sl = l;
9794 	count++;
9795 	if (count > 50) {
9796 	    GROW;
9797             if (ctxt->instate == XML_PARSER_EOF) {
9798 		xmlFree(buf);
9799 		return;
9800             }
9801 	    count = 0;
9802 	}
9803 	NEXTL(l);
9804 	cur = CUR_CHAR(l);
9805     }
9806     buf[len] = 0;
9807     ctxt->instate = XML_PARSER_CONTENT;
9808     if (cur != '>') {
9809 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9810 	                     "CData section not finished\n%.50s\n", buf);
9811 	xmlFree(buf);
9812         return;
9813     }
9814     NEXTL(l);
9815 
9816     /*
9817      * OK the buffer is to be consumed as cdata.
9818      */
9819     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9820 	if (ctxt->sax->cdataBlock != NULL)
9821 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9822 	else if (ctxt->sax->characters != NULL)
9823 	    ctxt->sax->characters(ctxt->userData, buf, len);
9824     }
9825     xmlFree(buf);
9826 }
9827 
9828 /**
9829  * xmlParseContent:
9830  * @ctxt:  an XML parser context
9831  *
9832  * Parse a content:
9833  *
9834  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9835  */
9836 
9837 void
xmlParseContent(xmlParserCtxtPtr ctxt)9838 xmlParseContent(xmlParserCtxtPtr ctxt) {
9839     GROW;
9840     while ((RAW != 0) &&
9841 	   ((RAW != '<') || (NXT(1) != '/')) &&
9842 	   (ctxt->instate != XML_PARSER_EOF)) {
9843 	const xmlChar *test = CUR_PTR;
9844 	unsigned int cons = ctxt->input->consumed;
9845 	const xmlChar *cur = ctxt->input->cur;
9846 
9847 	/*
9848 	 * First case : a Processing Instruction.
9849 	 */
9850 	if ((*cur == '<') && (cur[1] == '?')) {
9851 	    xmlParsePI(ctxt);
9852 	}
9853 
9854 	/*
9855 	 * Second case : a CDSection
9856 	 */
9857 	/* 2.6.0 test was *cur not RAW */
9858 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9859 	    xmlParseCDSect(ctxt);
9860 	}
9861 
9862 	/*
9863 	 * Third case :  a comment
9864 	 */
9865 	else if ((*cur == '<') && (NXT(1) == '!') &&
9866 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9867 	    xmlParseComment(ctxt);
9868 	    ctxt->instate = XML_PARSER_CONTENT;
9869 	}
9870 
9871 	/*
9872 	 * Fourth case :  a sub-element.
9873 	 */
9874 	else if (*cur == '<') {
9875 	    xmlParseElement(ctxt);
9876 	}
9877 
9878 	/*
9879 	 * Fifth case : a reference. If if has not been resolved,
9880 	 *    parsing returns it's Name, create the node
9881 	 */
9882 
9883 	else if (*cur == '&') {
9884 	    xmlParseReference(ctxt);
9885 	}
9886 
9887 	/*
9888 	 * Last case, text. Note that References are handled directly.
9889 	 */
9890 	else {
9891 	    xmlParseCharData(ctxt, 0);
9892 	}
9893 
9894 	GROW;
9895 	SHRINK;
9896 
9897 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9898 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9899 	                "detected an error in element content\n");
9900 	    xmlHaltParser(ctxt);
9901             break;
9902 	}
9903     }
9904 }
9905 
9906 /**
9907  * xmlParseElement:
9908  * @ctxt:  an XML parser context
9909  *
9910  * parse an XML element, this is highly recursive
9911  *
9912  * [39] element ::= EmptyElemTag | STag content ETag
9913  *
9914  * [ WFC: Element Type Match ]
9915  * The Name in an element's end-tag must match the element type in the
9916  * start-tag.
9917  *
9918  */
9919 
9920 void
xmlParseElement(xmlParserCtxtPtr ctxt)9921 xmlParseElement(xmlParserCtxtPtr ctxt) {
9922     const xmlChar *name;
9923     const xmlChar *prefix = NULL;
9924     const xmlChar *URI = NULL;
9925     xmlParserNodeInfo node_info;
9926     int line, tlen = 0;
9927     xmlNodePtr ret;
9928     int nsNr = ctxt->nsNr;
9929 
9930     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9931         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9932 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9933 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9934 			  xmlParserMaxDepth);
9935 	xmlHaltParser(ctxt);
9936 	return;
9937     }
9938 
9939     /* Capture start position */
9940     if (ctxt->record_info) {
9941         node_info.begin_pos = ctxt->input->consumed +
9942                           (CUR_PTR - ctxt->input->base);
9943 	node_info.begin_line = ctxt->input->line;
9944     }
9945 
9946     if (ctxt->spaceNr == 0)
9947 	spacePush(ctxt, -1);
9948     else if (*ctxt->space == -2)
9949 	spacePush(ctxt, -1);
9950     else
9951 	spacePush(ctxt, *ctxt->space);
9952 
9953     line = ctxt->input->line;
9954 #ifdef LIBXML_SAX1_ENABLED
9955     if (ctxt->sax2)
9956 #endif /* LIBXML_SAX1_ENABLED */
9957         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9958 #ifdef LIBXML_SAX1_ENABLED
9959     else
9960 	name = xmlParseStartTag(ctxt);
9961 #endif /* LIBXML_SAX1_ENABLED */
9962     if (ctxt->instate == XML_PARSER_EOF)
9963 	return;
9964     if (name == NULL) {
9965 	spacePop(ctxt);
9966         return;
9967     }
9968     namePush(ctxt, name);
9969     ret = ctxt->node;
9970 
9971 #ifdef LIBXML_VALID_ENABLED
9972     /*
9973      * [ VC: Root Element Type ]
9974      * The Name in the document type declaration must match the element
9975      * type of the root element.
9976      */
9977     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9978         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9979         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9980 #endif /* LIBXML_VALID_ENABLED */
9981 
9982     /*
9983      * Check for an Empty Element.
9984      */
9985     if ((RAW == '/') && (NXT(1) == '>')) {
9986         SKIP(2);
9987 	if (ctxt->sax2) {
9988 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9989 		(!ctxt->disableSAX))
9990 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9991 #ifdef LIBXML_SAX1_ENABLED
9992 	} else {
9993 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9994 		(!ctxt->disableSAX))
9995 		ctxt->sax->endElement(ctxt->userData, name);
9996 #endif /* LIBXML_SAX1_ENABLED */
9997 	}
9998 	namePop(ctxt);
9999 	spacePop(ctxt);
10000 	if (nsNr != ctxt->nsNr)
10001 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10002 	if ( ret != NULL && ctxt->record_info ) {
10003 	   node_info.end_pos = ctxt->input->consumed +
10004 			      (CUR_PTR - ctxt->input->base);
10005 	   node_info.end_line = ctxt->input->line;
10006 	   node_info.node = ret;
10007 	   xmlParserAddNodeInfo(ctxt, &node_info);
10008 	}
10009 	return;
10010     }
10011     if (RAW == '>') {
10012         NEXT1;
10013     } else {
10014         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10015 		     "Couldn't find end of Start Tag %s line %d\n",
10016 		                name, line, NULL);
10017 
10018 	/*
10019 	 * end of parsing of this node.
10020 	 */
10021 	nodePop(ctxt);
10022 	namePop(ctxt);
10023 	spacePop(ctxt);
10024 	if (nsNr != ctxt->nsNr)
10025 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10026 
10027 	/*
10028 	 * Capture end position and add node
10029 	 */
10030 	if ( ret != NULL && ctxt->record_info ) {
10031 	   node_info.end_pos = ctxt->input->consumed +
10032 			      (CUR_PTR - ctxt->input->base);
10033 	   node_info.end_line = ctxt->input->line;
10034 	   node_info.node = ret;
10035 	   xmlParserAddNodeInfo(ctxt, &node_info);
10036 	}
10037 	return;
10038     }
10039 
10040     /*
10041      * Parse the content of the element:
10042      */
10043     xmlParseContent(ctxt);
10044     if (ctxt->instate == XML_PARSER_EOF)
10045 	return;
10046     if (!IS_BYTE_CHAR(RAW)) {
10047         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10048 	 "Premature end of data in tag %s line %d\n",
10049 		                name, line, NULL);
10050 
10051 	/*
10052 	 * end of parsing of this node.
10053 	 */
10054 	nodePop(ctxt);
10055 	namePop(ctxt);
10056 	spacePop(ctxt);
10057 	if (nsNr != ctxt->nsNr)
10058 	    nsPop(ctxt, ctxt->nsNr - nsNr);
10059 	return;
10060     }
10061 
10062     /*
10063      * parse the end of tag: '</' should be here.
10064      */
10065     if (ctxt->sax2) {
10066 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10067 	namePop(ctxt);
10068     }
10069 #ifdef LIBXML_SAX1_ENABLED
10070       else
10071 	xmlParseEndTag1(ctxt, line);
10072 #endif /* LIBXML_SAX1_ENABLED */
10073 
10074     /*
10075      * Capture end position and add node
10076      */
10077     if ( ret != NULL && ctxt->record_info ) {
10078        node_info.end_pos = ctxt->input->consumed +
10079                           (CUR_PTR - ctxt->input->base);
10080        node_info.end_line = ctxt->input->line;
10081        node_info.node = ret;
10082        xmlParserAddNodeInfo(ctxt, &node_info);
10083     }
10084 }
10085 
10086 /**
10087  * xmlParseVersionNum:
10088  * @ctxt:  an XML parser context
10089  *
10090  * parse the XML version value.
10091  *
10092  * [26] VersionNum ::= '1.' [0-9]+
10093  *
10094  * In practice allow [0-9].[0-9]+ at that level
10095  *
10096  * Returns the string giving the XML version number, or NULL
10097  */
10098 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10099 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10100     xmlChar *buf = NULL;
10101     int len = 0;
10102     int size = 10;
10103     xmlChar cur;
10104 
10105     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10106     if (buf == NULL) {
10107 	xmlErrMemory(ctxt, NULL);
10108 	return(NULL);
10109     }
10110     cur = CUR;
10111     if (!((cur >= '0') && (cur <= '9'))) {
10112 	xmlFree(buf);
10113 	return(NULL);
10114     }
10115     buf[len++] = cur;
10116     NEXT;
10117     cur=CUR;
10118     if (cur != '.') {
10119 	xmlFree(buf);
10120 	return(NULL);
10121     }
10122     buf[len++] = cur;
10123     NEXT;
10124     cur=CUR;
10125     while ((cur >= '0') && (cur <= '9')) {
10126 	if (len + 1 >= size) {
10127 	    xmlChar *tmp;
10128 
10129 	    size *= 2;
10130 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10131 	    if (tmp == NULL) {
10132 	        xmlFree(buf);
10133 		xmlErrMemory(ctxt, NULL);
10134 		return(NULL);
10135 	    }
10136 	    buf = tmp;
10137 	}
10138 	buf[len++] = cur;
10139 	NEXT;
10140 	cur=CUR;
10141     }
10142     buf[len] = 0;
10143     return(buf);
10144 }
10145 
10146 /**
10147  * xmlParseVersionInfo:
10148  * @ctxt:  an XML parser context
10149  *
10150  * parse the XML version.
10151  *
10152  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10153  *
10154  * [25] Eq ::= S? '=' S?
10155  *
10156  * Returns the version string, e.g. "1.0"
10157  */
10158 
10159 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10160 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10161     xmlChar *version = NULL;
10162 
10163     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10164 	SKIP(7);
10165 	SKIP_BLANKS;
10166 	if (RAW != '=') {
10167 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10168 	    return(NULL);
10169         }
10170 	NEXT;
10171 	SKIP_BLANKS;
10172 	if (RAW == '"') {
10173 	    NEXT;
10174 	    version = xmlParseVersionNum(ctxt);
10175 	    if (RAW != '"') {
10176 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10177 	    } else
10178 	        NEXT;
10179 	} else if (RAW == '\''){
10180 	    NEXT;
10181 	    version = xmlParseVersionNum(ctxt);
10182 	    if (RAW != '\'') {
10183 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10184 	    } else
10185 	        NEXT;
10186 	} else {
10187 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10188 	}
10189     }
10190     return(version);
10191 }
10192 
10193 /**
10194  * xmlParseEncName:
10195  * @ctxt:  an XML parser context
10196  *
10197  * parse the XML encoding name
10198  *
10199  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10200  *
10201  * Returns the encoding name value or NULL
10202  */
10203 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10204 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10205     xmlChar *buf = NULL;
10206     int len = 0;
10207     int size = 10;
10208     xmlChar cur;
10209 
10210     cur = CUR;
10211     if (((cur >= 'a') && (cur <= 'z')) ||
10212         ((cur >= 'A') && (cur <= 'Z'))) {
10213 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10214 	if (buf == NULL) {
10215 	    xmlErrMemory(ctxt, NULL);
10216 	    return(NULL);
10217 	}
10218 
10219 	buf[len++] = cur;
10220 	NEXT;
10221 	cur = CUR;
10222 	while (((cur >= 'a') && (cur <= 'z')) ||
10223 	       ((cur >= 'A') && (cur <= 'Z')) ||
10224 	       ((cur >= '0') && (cur <= '9')) ||
10225 	       (cur == '.') || (cur == '_') ||
10226 	       (cur == '-')) {
10227 	    if (len + 1 >= size) {
10228 	        xmlChar *tmp;
10229 
10230 		size *= 2;
10231 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10232 		if (tmp == NULL) {
10233 		    xmlErrMemory(ctxt, NULL);
10234 		    xmlFree(buf);
10235 		    return(NULL);
10236 		}
10237 		buf = tmp;
10238 	    }
10239 	    buf[len++] = cur;
10240 	    NEXT;
10241 	    cur = CUR;
10242 	    if (cur == 0) {
10243 	        SHRINK;
10244 		GROW;
10245 		cur = CUR;
10246 	    }
10247         }
10248 	buf[len] = 0;
10249     } else {
10250 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10251     }
10252     return(buf);
10253 }
10254 
10255 /**
10256  * xmlParseEncodingDecl:
10257  * @ctxt:  an XML parser context
10258  *
10259  * parse the XML encoding declaration
10260  *
10261  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10262  *
10263  * this setups the conversion filters.
10264  *
10265  * Returns the encoding value or NULL
10266  */
10267 
10268 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10269 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10270     xmlChar *encoding = NULL;
10271 
10272     SKIP_BLANKS;
10273     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10274 	SKIP(8);
10275 	SKIP_BLANKS;
10276 	if (RAW != '=') {
10277 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10278 	    return(NULL);
10279         }
10280 	NEXT;
10281 	SKIP_BLANKS;
10282 	if (RAW == '"') {
10283 	    NEXT;
10284 	    encoding = xmlParseEncName(ctxt);
10285 	    if (RAW != '"') {
10286 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10287 		xmlFree((xmlChar *) encoding);
10288 		return(NULL);
10289 	    } else
10290 	        NEXT;
10291 	} else if (RAW == '\''){
10292 	    NEXT;
10293 	    encoding = xmlParseEncName(ctxt);
10294 	    if (RAW != '\'') {
10295 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10296 		xmlFree((xmlChar *) encoding);
10297 		return(NULL);
10298 	    } else
10299 	        NEXT;
10300 	} else {
10301 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10302 	}
10303 
10304         /*
10305          * Non standard parsing, allowing the user to ignore encoding
10306          */
10307         if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10308 	    xmlFree((xmlChar *) encoding);
10309             return(NULL);
10310 	}
10311 
10312 	/*
10313 	 * UTF-16 encoding stwich has already taken place at this stage,
10314 	 * more over the little-endian/big-endian selection is already done
10315 	 */
10316         if ((encoding != NULL) &&
10317 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10318 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10319 	    /*
10320 	     * If no encoding was passed to the parser, that we are
10321 	     * using UTF-16 and no decoder is present i.e. the
10322 	     * document is apparently UTF-8 compatible, then raise an
10323 	     * encoding mismatch fatal error
10324 	     */
10325 	    if ((ctxt->encoding == NULL) &&
10326 	        (ctxt->input->buf != NULL) &&
10327 	        (ctxt->input->buf->encoder == NULL)) {
10328 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10329 		  "Document labelled UTF-16 but has UTF-8 content\n");
10330 	    }
10331 	    if (ctxt->encoding != NULL)
10332 		xmlFree((xmlChar *) ctxt->encoding);
10333 	    ctxt->encoding = encoding;
10334 	}
10335 	/*
10336 	 * UTF-8 encoding is handled natively
10337 	 */
10338         else if ((encoding != NULL) &&
10339 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10340 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10341 	    if (ctxt->encoding != NULL)
10342 		xmlFree((xmlChar *) ctxt->encoding);
10343 	    ctxt->encoding = encoding;
10344 	}
10345 	else if (encoding != NULL) {
10346 	    xmlCharEncodingHandlerPtr handler;
10347 
10348 	    if (ctxt->input->encoding != NULL)
10349 		xmlFree((xmlChar *) ctxt->input->encoding);
10350 	    ctxt->input->encoding = encoding;
10351 
10352             handler = xmlFindCharEncodingHandler((const char *) encoding);
10353 	    if (handler != NULL) {
10354 		if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10355 		    /* failed to convert */
10356 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10357 		    return(NULL);
10358 		}
10359 	    } else {
10360 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10361 			"Unsupported encoding %s\n", encoding);
10362 		return(NULL);
10363 	    }
10364 	}
10365     }
10366     return(encoding);
10367 }
10368 
10369 /**
10370  * xmlParseSDDecl:
10371  * @ctxt:  an XML parser context
10372  *
10373  * parse the XML standalone declaration
10374  *
10375  * [32] SDDecl ::= S 'standalone' Eq
10376  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10377  *
10378  * [ VC: Standalone Document Declaration ]
10379  * TODO The standalone document declaration must have the value "no"
10380  * if any external markup declarations contain declarations of:
10381  *  - attributes with default values, if elements to which these
10382  *    attributes apply appear in the document without specifications
10383  *    of values for these attributes, or
10384  *  - entities (other than amp, lt, gt, apos, quot), if references
10385  *    to those entities appear in the document, or
10386  *  - attributes with values subject to normalization, where the
10387  *    attribute appears in the document with a value which will change
10388  *    as a result of normalization, or
10389  *  - element types with element content, if white space occurs directly
10390  *    within any instance of those types.
10391  *
10392  * Returns:
10393  *   1 if standalone="yes"
10394  *   0 if standalone="no"
10395  *  -2 if standalone attribute is missing or invalid
10396  *	  (A standalone value of -2 means that the XML declaration was found,
10397  *	   but no value was specified for the standalone attribute).
10398  */
10399 
10400 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10401 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10402     int standalone = -2;
10403 
10404     SKIP_BLANKS;
10405     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10406 	SKIP(10);
10407         SKIP_BLANKS;
10408 	if (RAW != '=') {
10409 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10410 	    return(standalone);
10411         }
10412 	NEXT;
10413 	SKIP_BLANKS;
10414         if (RAW == '\''){
10415 	    NEXT;
10416 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10417 	        standalone = 0;
10418                 SKIP(2);
10419 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10420 	               (NXT(2) == 's')) {
10421 	        standalone = 1;
10422 		SKIP(3);
10423             } else {
10424 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10425 	    }
10426 	    if (RAW != '\'') {
10427 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10428 	    } else
10429 	        NEXT;
10430 	} else if (RAW == '"'){
10431 	    NEXT;
10432 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10433 	        standalone = 0;
10434 		SKIP(2);
10435 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10436 	               (NXT(2) == 's')) {
10437 	        standalone = 1;
10438                 SKIP(3);
10439             } else {
10440 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10441 	    }
10442 	    if (RAW != '"') {
10443 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10444 	    } else
10445 	        NEXT;
10446 	} else {
10447 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10448         }
10449     }
10450     return(standalone);
10451 }
10452 
10453 /**
10454  * xmlParseXMLDecl:
10455  * @ctxt:  an XML parser context
10456  *
10457  * parse an XML declaration header
10458  *
10459  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10460  */
10461 
10462 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10463 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10464     xmlChar *version;
10465 
10466     /*
10467      * This value for standalone indicates that the document has an
10468      * XML declaration but it does not have a standalone attribute.
10469      * It will be overwritten later if a standalone attribute is found.
10470      */
10471     ctxt->input->standalone = -2;
10472 
10473     /*
10474      * We know that '<?xml' is here.
10475      */
10476     SKIP(5);
10477 
10478     if (!IS_BLANK_CH(RAW)) {
10479 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10480 	               "Blank needed after '<?xml'\n");
10481     }
10482     SKIP_BLANKS;
10483 
10484     /*
10485      * We must have the VersionInfo here.
10486      */
10487     version = xmlParseVersionInfo(ctxt);
10488     if (version == NULL) {
10489 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10490     } else {
10491 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10492 	    /*
10493 	     * Changed here for XML-1.0 5th edition
10494 	     */
10495 	    if (ctxt->options & XML_PARSE_OLD10) {
10496 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10497 			          "Unsupported version '%s'\n",
10498 			          version);
10499 	    } else {
10500 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10501 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10502 		                  "Unsupported version '%s'\n",
10503 				  version, NULL);
10504 		} else {
10505 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10506 				      "Unsupported version '%s'\n",
10507 				      version);
10508 		}
10509 	    }
10510 	}
10511 	if (ctxt->version != NULL)
10512 	    xmlFree((void *) ctxt->version);
10513 	ctxt->version = version;
10514     }
10515 
10516     /*
10517      * We may have the encoding declaration
10518      */
10519     if (!IS_BLANK_CH(RAW)) {
10520         if ((RAW == '?') && (NXT(1) == '>')) {
10521 	    SKIP(2);
10522 	    return;
10523 	}
10524 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10525     }
10526     xmlParseEncodingDecl(ctxt);
10527     if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10528          (ctxt->instate == XML_PARSER_EOF)) {
10529 	/*
10530 	 * The XML REC instructs us to stop parsing right here
10531 	 */
10532         return;
10533     }
10534 
10535     /*
10536      * We may have the standalone status.
10537      */
10538     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10539         if ((RAW == '?') && (NXT(1) == '>')) {
10540 	    SKIP(2);
10541 	    return;
10542 	}
10543 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10544     }
10545 
10546     /*
10547      * We can grow the input buffer freely at that point
10548      */
10549     GROW;
10550 
10551     SKIP_BLANKS;
10552     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10553 
10554     SKIP_BLANKS;
10555     if ((RAW == '?') && (NXT(1) == '>')) {
10556         SKIP(2);
10557     } else if (RAW == '>') {
10558         /* Deprecated old WD ... */
10559 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10560 	NEXT;
10561     } else {
10562 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10563 	MOVETO_ENDTAG(CUR_PTR);
10564 	NEXT;
10565     }
10566 }
10567 
10568 /**
10569  * xmlParseMisc:
10570  * @ctxt:  an XML parser context
10571  *
10572  * parse an XML Misc* optional field.
10573  *
10574  * [27] Misc ::= Comment | PI |  S
10575  */
10576 
10577 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10578 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10579     while ((ctxt->instate != XML_PARSER_EOF) &&
10580            (((RAW == '<') && (NXT(1) == '?')) ||
10581             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10582             IS_BLANK_CH(CUR))) {
10583         if ((RAW == '<') && (NXT(1) == '?')) {
10584 	    xmlParsePI(ctxt);
10585 	} else if (IS_BLANK_CH(CUR)) {
10586 	    NEXT;
10587 	} else
10588 	    xmlParseComment(ctxt);
10589     }
10590 }
10591 
10592 /**
10593  * xmlParseDocument:
10594  * @ctxt:  an XML parser context
10595  *
10596  * parse an XML document (and build a tree if using the standard SAX
10597  * interface).
10598  *
10599  * [1] document ::= prolog element Misc*
10600  *
10601  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10602  *
10603  * Returns 0, -1 in case of error. the parser context is augmented
10604  *                as a result of the parsing.
10605  */
10606 
10607 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10608 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10609     xmlChar start[4];
10610     xmlCharEncoding enc;
10611 
10612     xmlInitParser();
10613 
10614     if ((ctxt == NULL) || (ctxt->input == NULL))
10615         return(-1);
10616 
10617     GROW;
10618 
10619     /*
10620      * SAX: detecting the level.
10621      */
10622     xmlDetectSAX2(ctxt);
10623 
10624     /*
10625      * SAX: beginning of the document processing.
10626      */
10627     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10629     if (ctxt->instate == XML_PARSER_EOF)
10630 	return(-1);
10631 
10632     if ((ctxt->encoding == NULL) &&
10633         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10634 	/*
10635 	 * Get the 4 first bytes and decode the charset
10636 	 * if enc != XML_CHAR_ENCODING_NONE
10637 	 * plug some encoding conversion routines.
10638 	 */
10639 	start[0] = RAW;
10640 	start[1] = NXT(1);
10641 	start[2] = NXT(2);
10642 	start[3] = NXT(3);
10643 	enc = xmlDetectCharEncoding(&start[0], 4);
10644 	if (enc != XML_CHAR_ENCODING_NONE) {
10645 	    xmlSwitchEncoding(ctxt, enc);
10646 	}
10647     }
10648 
10649 
10650     if (CUR == 0) {
10651 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10652 	return(-1);
10653     }
10654 
10655     /*
10656      * Check for the XMLDecl in the Prolog.
10657      * do not GROW here to avoid the detected encoder to decode more
10658      * than just the first line, unless the amount of data is really
10659      * too small to hold "<?xml version="1.0" encoding="foo"
10660      */
10661     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10662        GROW;
10663     }
10664     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10665 
10666 	/*
10667 	 * Note that we will switch encoding on the fly.
10668 	 */
10669 	xmlParseXMLDecl(ctxt);
10670 	if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10671 	    (ctxt->instate == XML_PARSER_EOF)) {
10672 	    /*
10673 	     * The XML REC instructs us to stop parsing right here
10674 	     */
10675 	    return(-1);
10676 	}
10677 	ctxt->standalone = ctxt->input->standalone;
10678 	SKIP_BLANKS;
10679     } else {
10680 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10681     }
10682     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10683         ctxt->sax->startDocument(ctxt->userData);
10684     if (ctxt->instate == XML_PARSER_EOF)
10685 	return(-1);
10686     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10687         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10688 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10689     }
10690 
10691     /*
10692      * The Misc part of the Prolog
10693      */
10694     GROW;
10695     xmlParseMisc(ctxt);
10696 
10697     /*
10698      * Then possibly doc type declaration(s) and more Misc
10699      * (doctypedecl Misc*)?
10700      */
10701     GROW;
10702     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10703 
10704 	ctxt->inSubset = 1;
10705 	xmlParseDocTypeDecl(ctxt);
10706 	if (RAW == '[') {
10707 	    ctxt->instate = XML_PARSER_DTD;
10708 	    xmlParseInternalSubset(ctxt);
10709 	    if (ctxt->instate == XML_PARSER_EOF)
10710 		return(-1);
10711 	}
10712 
10713 	/*
10714 	 * Create and update the external subset.
10715 	 */
10716 	ctxt->inSubset = 2;
10717 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10718 	    (!ctxt->disableSAX))
10719 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10720 	                              ctxt->extSubSystem, ctxt->extSubURI);
10721 	if (ctxt->instate == XML_PARSER_EOF)
10722 	    return(-1);
10723 	ctxt->inSubset = 0;
10724 
10725         xmlCleanSpecialAttr(ctxt);
10726 
10727 	ctxt->instate = XML_PARSER_PROLOG;
10728 	xmlParseMisc(ctxt);
10729     }
10730 
10731     /*
10732      * Time to start parsing the tree itself
10733      */
10734     GROW;
10735     if (RAW != '<') {
10736 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737 		       "Start tag expected, '<' not found\n");
10738     } else {
10739 	ctxt->instate = XML_PARSER_CONTENT;
10740 	xmlParseElement(ctxt);
10741 	ctxt->instate = XML_PARSER_EPILOG;
10742 
10743 
10744 	/*
10745 	 * The Misc part at the end
10746 	 */
10747 	xmlParseMisc(ctxt);
10748 
10749 	if (RAW != 0) {
10750 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10751 	}
10752 	ctxt->instate = XML_PARSER_EOF;
10753     }
10754 
10755     /*
10756      * SAX: end of the document processing.
10757      */
10758     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10759         ctxt->sax->endDocument(ctxt->userData);
10760 
10761     /*
10762      * Remove locally kept entity definitions if the tree was not built
10763      */
10764     if ((ctxt->myDoc != NULL) &&
10765 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10766 	xmlFreeDoc(ctxt->myDoc);
10767 	ctxt->myDoc = NULL;
10768     }
10769 
10770     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10771         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10772 	if (ctxt->valid)
10773 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10774 	if (ctxt->nsWellFormed)
10775 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10776 	if (ctxt->options & XML_PARSE_OLD10)
10777 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10778     }
10779     if (! ctxt->wellFormed) {
10780 	ctxt->valid = 0;
10781 	return(-1);
10782     }
10783     return(0);
10784 }
10785 
10786 /**
10787  * xmlParseExtParsedEnt:
10788  * @ctxt:  an XML parser context
10789  *
10790  * parse a general parsed entity
10791  * An external general parsed entity is well-formed if it matches the
10792  * production labeled extParsedEnt.
10793  *
10794  * [78] extParsedEnt ::= TextDecl? content
10795  *
10796  * Returns 0, -1 in case of error. the parser context is augmented
10797  *                as a result of the parsing.
10798  */
10799 
10800 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10801 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10802     xmlChar start[4];
10803     xmlCharEncoding enc;
10804 
10805     if ((ctxt == NULL) || (ctxt->input == NULL))
10806         return(-1);
10807 
10808     xmlDefaultSAXHandlerInit();
10809 
10810     xmlDetectSAX2(ctxt);
10811 
10812     GROW;
10813 
10814     /*
10815      * SAX: beginning of the document processing.
10816      */
10817     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10818         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10819 
10820     /*
10821      * Get the 4 first bytes and decode the charset
10822      * if enc != XML_CHAR_ENCODING_NONE
10823      * plug some encoding conversion routines.
10824      */
10825     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10826 	start[0] = RAW;
10827 	start[1] = NXT(1);
10828 	start[2] = NXT(2);
10829 	start[3] = NXT(3);
10830 	enc = xmlDetectCharEncoding(start, 4);
10831 	if (enc != XML_CHAR_ENCODING_NONE) {
10832 	    xmlSwitchEncoding(ctxt, enc);
10833 	}
10834     }
10835 
10836 
10837     if (CUR == 0) {
10838 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10839     }
10840 
10841     /*
10842      * Check for the XMLDecl in the Prolog.
10843      */
10844     GROW;
10845     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10846 
10847 	/*
10848 	 * Note that we will switch encoding on the fly.
10849 	 */
10850 	xmlParseXMLDecl(ctxt);
10851 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10852 	    /*
10853 	     * The XML REC instructs us to stop parsing right here
10854 	     */
10855 	    return(-1);
10856 	}
10857 	SKIP_BLANKS;
10858     } else {
10859 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10860     }
10861     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10862         ctxt->sax->startDocument(ctxt->userData);
10863     if (ctxt->instate == XML_PARSER_EOF)
10864 	return(-1);
10865 
10866     /*
10867      * Doing validity checking on chunk doesn't make sense
10868      */
10869     ctxt->instate = XML_PARSER_CONTENT;
10870     ctxt->validate = 0;
10871     ctxt->loadsubset = 0;
10872     ctxt->depth = 0;
10873 
10874     xmlParseContent(ctxt);
10875     if (ctxt->instate == XML_PARSER_EOF)
10876 	return(-1);
10877 
10878     if ((RAW == '<') && (NXT(1) == '/')) {
10879 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10880     } else if (RAW != 0) {
10881 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10882     }
10883 
10884     /*
10885      * SAX: end of the document processing.
10886      */
10887     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10888         ctxt->sax->endDocument(ctxt->userData);
10889 
10890     if (! ctxt->wellFormed) return(-1);
10891     return(0);
10892 }
10893 
10894 #ifdef LIBXML_PUSH_ENABLED
10895 /************************************************************************
10896  *									*
10897  *		Progressive parsing interfaces				*
10898  *									*
10899  ************************************************************************/
10900 
10901 /**
10902  * xmlParseLookupSequence:
10903  * @ctxt:  an XML parser context
10904  * @first:  the first char to lookup
10905  * @next:  the next char to lookup or zero
10906  * @third:  the next char to lookup or zero
10907  *
10908  * Try to find if a sequence (first, next, third) or  just (first next) or
10909  * (first) is available in the input stream.
10910  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10911  * to avoid rescanning sequences of bytes, it DOES change the state of the
10912  * parser, do not use liberally.
10913  *
10914  * Returns the index to the current parsing point if the full sequence
10915  *      is available, -1 otherwise.
10916  */
10917 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10918 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10919                        xmlChar next, xmlChar third) {
10920     int base, len;
10921     xmlParserInputPtr in;
10922     const xmlChar *buf;
10923 
10924     in = ctxt->input;
10925     if (in == NULL) return(-1);
10926     base = in->cur - in->base;
10927     if (base < 0) return(-1);
10928     if (ctxt->checkIndex > base)
10929         base = ctxt->checkIndex;
10930     if (in->buf == NULL) {
10931 	buf = in->base;
10932 	len = in->length;
10933     } else {
10934 	buf = xmlBufContent(in->buf->buffer);
10935 	len = xmlBufUse(in->buf->buffer);
10936     }
10937     /* take into account the sequence length */
10938     if (third) len -= 2;
10939     else if (next) len --;
10940     for (;base < len;base++) {
10941         if (buf[base] == first) {
10942 	    if (third != 0) {
10943 		if ((buf[base + 1] != next) ||
10944 		    (buf[base + 2] != third)) continue;
10945 	    } else if (next != 0) {
10946 		if (buf[base + 1] != next) continue;
10947 	    }
10948 	    ctxt->checkIndex = 0;
10949 #ifdef DEBUG_PUSH
10950 	    if (next == 0)
10951 		xmlGenericError(xmlGenericErrorContext,
10952 			"PP: lookup '%c' found at %d\n",
10953 			first, base);
10954 	    else if (third == 0)
10955 		xmlGenericError(xmlGenericErrorContext,
10956 			"PP: lookup '%c%c' found at %d\n",
10957 			first, next, base);
10958 	    else
10959 		xmlGenericError(xmlGenericErrorContext,
10960 			"PP: lookup '%c%c%c' found at %d\n",
10961 			first, next, third, base);
10962 #endif
10963 	    return(base - (in->cur - in->base));
10964 	}
10965     }
10966     ctxt->checkIndex = base;
10967 #ifdef DEBUG_PUSH
10968     if (next == 0)
10969 	xmlGenericError(xmlGenericErrorContext,
10970 		"PP: lookup '%c' failed\n", first);
10971     else if (third == 0)
10972 	xmlGenericError(xmlGenericErrorContext,
10973 		"PP: lookup '%c%c' failed\n", first, next);
10974     else
10975 	xmlGenericError(xmlGenericErrorContext,
10976 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10977 #endif
10978     return(-1);
10979 }
10980 
10981 /**
10982  * xmlParseGetLasts:
10983  * @ctxt:  an XML parser context
10984  * @lastlt:  pointer to store the last '<' from the input
10985  * @lastgt:  pointer to store the last '>' from the input
10986  *
10987  * Lookup the last < and > in the current chunk
10988  */
10989 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10990 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10991                  const xmlChar **lastgt) {
10992     const xmlChar *tmp;
10993 
10994     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10995 	xmlGenericError(xmlGenericErrorContext,
10996 		    "Internal error: xmlParseGetLasts\n");
10997 	return;
10998     }
10999     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11000         tmp = ctxt->input->end;
11001 	tmp--;
11002 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11003 	if (tmp < ctxt->input->base) {
11004 	    *lastlt = NULL;
11005 	    *lastgt = NULL;
11006 	} else {
11007 	    *lastlt = tmp;
11008 	    tmp++;
11009 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11010 	        if (*tmp == '\'') {
11011 		    tmp++;
11012 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11013 		    if (tmp < ctxt->input->end) tmp++;
11014 		} else if (*tmp == '"') {
11015 		    tmp++;
11016 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11017 		    if (tmp < ctxt->input->end) tmp++;
11018 		} else
11019 		    tmp++;
11020 	    }
11021 	    if (tmp < ctxt->input->end)
11022 	        *lastgt = tmp;
11023 	    else {
11024 	        tmp = *lastlt;
11025 		tmp--;
11026 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11027 		if (tmp >= ctxt->input->base)
11028 		    *lastgt = tmp;
11029 		else
11030 		    *lastgt = NULL;
11031 	    }
11032 	}
11033     } else {
11034         *lastlt = NULL;
11035 	*lastgt = NULL;
11036     }
11037 }
11038 /**
11039  * xmlCheckCdataPush:
11040  * @cur: pointer to the block of characters
11041  * @len: length of the block in bytes
11042  * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11043  *
11044  * Check that the block of characters is okay as SCdata content [20]
11045  *
11046  * Returns the number of bytes to pass if okay, a negative index where an
11047  *         UTF-8 error occurred otherwise
11048  */
11049 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11050 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11051     int ix;
11052     unsigned char c;
11053     int codepoint;
11054 
11055     if ((utf == NULL) || (len <= 0))
11056         return(0);
11057 
11058     for (ix = 0; ix < len;) {      /* string is 0-terminated */
11059         c = utf[ix];
11060         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
11061 	    if (c >= 0x20)
11062 		ix++;
11063 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11064 	        ix++;
11065 	    else
11066 	        return(-ix);
11067 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11068 	    if (ix + 2 > len) return(complete ? -ix : ix);
11069 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
11070 	        return(-ix);
11071 	    codepoint = (utf[ix] & 0x1f) << 6;
11072 	    codepoint |= utf[ix+1] & 0x3f;
11073 	    if (!xmlIsCharQ(codepoint))
11074 	        return(-ix);
11075 	    ix += 2;
11076 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11077 	    if (ix + 3 > len) return(complete ? -ix : ix);
11078 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11079 	        ((utf[ix+2] & 0xc0) != 0x80))
11080 		    return(-ix);
11081 	    codepoint = (utf[ix] & 0xf) << 12;
11082 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
11083 	    codepoint |= utf[ix+2] & 0x3f;
11084 	    if (!xmlIsCharQ(codepoint))
11085 	        return(-ix);
11086 	    ix += 3;
11087 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11088 	    if (ix + 4 > len) return(complete ? -ix : ix);
11089 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
11090 	        ((utf[ix+2] & 0xc0) != 0x80) ||
11091 		((utf[ix+3] & 0xc0) != 0x80))
11092 		    return(-ix);
11093 	    codepoint = (utf[ix] & 0x7) << 18;
11094 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
11095 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
11096 	    codepoint |= utf[ix+3] & 0x3f;
11097 	    if (!xmlIsCharQ(codepoint))
11098 	        return(-ix);
11099 	    ix += 4;
11100 	} else				/* unknown encoding */
11101 	    return(-ix);
11102       }
11103       return(ix);
11104 }
11105 
11106 /**
11107  * xmlParseTryOrFinish:
11108  * @ctxt:  an XML parser context
11109  * @terminate:  last chunk indicator
11110  *
11111  * Try to progress on parsing
11112  *
11113  * Returns zero if no parsing was possible
11114  */
11115 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11116 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11117     int ret = 0;
11118     int avail, tlen;
11119     xmlChar cur, next;
11120     const xmlChar *lastlt, *lastgt;
11121 
11122     if (ctxt->input == NULL)
11123         return(0);
11124 
11125 #ifdef DEBUG_PUSH
11126     switch (ctxt->instate) {
11127 	case XML_PARSER_EOF:
11128 	    xmlGenericError(xmlGenericErrorContext,
11129 		    "PP: try EOF\n"); break;
11130 	case XML_PARSER_START:
11131 	    xmlGenericError(xmlGenericErrorContext,
11132 		    "PP: try START\n"); break;
11133 	case XML_PARSER_MISC:
11134 	    xmlGenericError(xmlGenericErrorContext,
11135 		    "PP: try MISC\n");break;
11136 	case XML_PARSER_COMMENT:
11137 	    xmlGenericError(xmlGenericErrorContext,
11138 		    "PP: try COMMENT\n");break;
11139 	case XML_PARSER_PROLOG:
11140 	    xmlGenericError(xmlGenericErrorContext,
11141 		    "PP: try PROLOG\n");break;
11142 	case XML_PARSER_START_TAG:
11143 	    xmlGenericError(xmlGenericErrorContext,
11144 		    "PP: try START_TAG\n");break;
11145 	case XML_PARSER_CONTENT:
11146 	    xmlGenericError(xmlGenericErrorContext,
11147 		    "PP: try CONTENT\n");break;
11148 	case XML_PARSER_CDATA_SECTION:
11149 	    xmlGenericError(xmlGenericErrorContext,
11150 		    "PP: try CDATA_SECTION\n");break;
11151 	case XML_PARSER_END_TAG:
11152 	    xmlGenericError(xmlGenericErrorContext,
11153 		    "PP: try END_TAG\n");break;
11154 	case XML_PARSER_ENTITY_DECL:
11155 	    xmlGenericError(xmlGenericErrorContext,
11156 		    "PP: try ENTITY_DECL\n");break;
11157 	case XML_PARSER_ENTITY_VALUE:
11158 	    xmlGenericError(xmlGenericErrorContext,
11159 		    "PP: try ENTITY_VALUE\n");break;
11160 	case XML_PARSER_ATTRIBUTE_VALUE:
11161 	    xmlGenericError(xmlGenericErrorContext,
11162 		    "PP: try ATTRIBUTE_VALUE\n");break;
11163 	case XML_PARSER_DTD:
11164 	    xmlGenericError(xmlGenericErrorContext,
11165 		    "PP: try DTD\n");break;
11166 	case XML_PARSER_EPILOG:
11167 	    xmlGenericError(xmlGenericErrorContext,
11168 		    "PP: try EPILOG\n");break;
11169 	case XML_PARSER_PI:
11170 	    xmlGenericError(xmlGenericErrorContext,
11171 		    "PP: try PI\n");break;
11172         case XML_PARSER_IGNORE:
11173             xmlGenericError(xmlGenericErrorContext,
11174 		    "PP: try IGNORE\n");break;
11175     }
11176 #endif
11177 
11178     if ((ctxt->input != NULL) &&
11179         (ctxt->input->cur - ctxt->input->base > 4096)) {
11180 	xmlSHRINK(ctxt);
11181 	ctxt->checkIndex = 0;
11182     }
11183     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11184 
11185     while (ctxt->instate != XML_PARSER_EOF) {
11186 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11187 	    return(0);
11188 
11189 	if (ctxt->input == NULL) break;
11190 	if (ctxt->input->buf == NULL)
11191 	    avail = ctxt->input->length -
11192 	            (ctxt->input->cur - ctxt->input->base);
11193 	else {
11194 	    /*
11195 	     * If we are operating on converted input, try to flush
11196 	     * remainng chars to avoid them stalling in the non-converted
11197 	     * buffer. But do not do this in document start where
11198 	     * encoding="..." may not have been read and we work on a
11199 	     * guessed encoding.
11200 	     */
11201 	    if ((ctxt->instate != XML_PARSER_START) &&
11202 	        (ctxt->input->buf->raw != NULL) &&
11203 		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11204                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11205                                                  ctxt->input);
11206 		size_t current = ctxt->input->cur - ctxt->input->base;
11207 
11208 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11209                 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11210                                       base, current);
11211 	    }
11212 	    avail = xmlBufUse(ctxt->input->buf->buffer) -
11213 		    (ctxt->input->cur - ctxt->input->base);
11214 	}
11215         if (avail < 1)
11216 	    goto done;
11217         switch (ctxt->instate) {
11218             case XML_PARSER_EOF:
11219 	        /*
11220 		 * Document parsing is done !
11221 		 */
11222 	        goto done;
11223             case XML_PARSER_START:
11224 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11225 		    xmlChar start[4];
11226 		    xmlCharEncoding enc;
11227 
11228 		    /*
11229 		     * Very first chars read from the document flow.
11230 		     */
11231 		    if (avail < 4)
11232 			goto done;
11233 
11234 		    /*
11235 		     * Get the 4 first bytes and decode the charset
11236 		     * if enc != XML_CHAR_ENCODING_NONE
11237 		     * plug some encoding conversion routines,
11238 		     * else xmlSwitchEncoding will set to (default)
11239 		     * UTF8.
11240 		     */
11241 		    start[0] = RAW;
11242 		    start[1] = NXT(1);
11243 		    start[2] = NXT(2);
11244 		    start[3] = NXT(3);
11245 		    enc = xmlDetectCharEncoding(start, 4);
11246 		    xmlSwitchEncoding(ctxt, enc);
11247 		    break;
11248 		}
11249 
11250 		if (avail < 2)
11251 		    goto done;
11252 		cur = ctxt->input->cur[0];
11253 		next = ctxt->input->cur[1];
11254 		if (cur == 0) {
11255 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256 			ctxt->sax->setDocumentLocator(ctxt->userData,
11257 						      &xmlDefaultSAXLocator);
11258 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11259 		    xmlHaltParser(ctxt);
11260 #ifdef DEBUG_PUSH
11261 		    xmlGenericError(xmlGenericErrorContext,
11262 			    "PP: entering EOF\n");
11263 #endif
11264 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265 			ctxt->sax->endDocument(ctxt->userData);
11266 		    goto done;
11267 		}
11268 	        if ((cur == '<') && (next == '?')) {
11269 		    /* PI or XML decl */
11270 		    if (avail < 5) return(ret);
11271 		    if ((!terminate) &&
11272 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11273 			return(ret);
11274 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275 			ctxt->sax->setDocumentLocator(ctxt->userData,
11276 						      &xmlDefaultSAXLocator);
11277 		    if ((ctxt->input->cur[2] == 'x') &&
11278 			(ctxt->input->cur[3] == 'm') &&
11279 			(ctxt->input->cur[4] == 'l') &&
11280 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11281 			ret += 5;
11282 #ifdef DEBUG_PUSH
11283 			xmlGenericError(xmlGenericErrorContext,
11284 				"PP: Parsing XML Decl\n");
11285 #endif
11286 			xmlParseXMLDecl(ctxt);
11287 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11288 			    /*
11289 			     * The XML REC instructs us to stop parsing right
11290 			     * here
11291 			     */
11292 			    xmlHaltParser(ctxt);
11293 			    return(0);
11294 			}
11295 			ctxt->standalone = ctxt->input->standalone;
11296 			if ((ctxt->encoding == NULL) &&
11297 			    (ctxt->input->encoding != NULL))
11298 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300 			    (!ctxt->disableSAX))
11301 			    ctxt->sax->startDocument(ctxt->userData);
11302 			ctxt->instate = XML_PARSER_MISC;
11303 #ifdef DEBUG_PUSH
11304 			xmlGenericError(xmlGenericErrorContext,
11305 				"PP: entering MISC\n");
11306 #endif
11307 		    } else {
11308 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310 			    (!ctxt->disableSAX))
11311 			    ctxt->sax->startDocument(ctxt->userData);
11312 			ctxt->instate = XML_PARSER_MISC;
11313 #ifdef DEBUG_PUSH
11314 			xmlGenericError(xmlGenericErrorContext,
11315 				"PP: entering MISC\n");
11316 #endif
11317 		    }
11318 		} else {
11319 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320 			ctxt->sax->setDocumentLocator(ctxt->userData,
11321 						      &xmlDefaultSAXLocator);
11322 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323 		    if (ctxt->version == NULL) {
11324 		        xmlErrMemory(ctxt, NULL);
11325 			break;
11326 		    }
11327 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328 		        (!ctxt->disableSAX))
11329 			ctxt->sax->startDocument(ctxt->userData);
11330 		    ctxt->instate = XML_PARSER_MISC;
11331 #ifdef DEBUG_PUSH
11332 		    xmlGenericError(xmlGenericErrorContext,
11333 			    "PP: entering MISC\n");
11334 #endif
11335 		}
11336 		break;
11337             case XML_PARSER_START_TAG: {
11338 	        const xmlChar *name;
11339 		const xmlChar *prefix = NULL;
11340 		const xmlChar *URI = NULL;
11341 		int nsNr = ctxt->nsNr;
11342 
11343 		if ((avail < 2) && (ctxt->inputNr == 1))
11344 		    goto done;
11345 		cur = ctxt->input->cur[0];
11346 	        if (cur != '<') {
11347 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11348 		    xmlHaltParser(ctxt);
11349 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350 			ctxt->sax->endDocument(ctxt->userData);
11351 		    goto done;
11352 		}
11353 		if (!terminate) {
11354 		    if (ctxt->progressive) {
11355 		        /* > can be found unescaped in attribute values */
11356 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11357 			    goto done;
11358 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11359 			goto done;
11360 		    }
11361 		}
11362 		if (ctxt->spaceNr == 0)
11363 		    spacePush(ctxt, -1);
11364 		else if (*ctxt->space == -2)
11365 		    spacePush(ctxt, -1);
11366 		else
11367 		    spacePush(ctxt, *ctxt->space);
11368 #ifdef LIBXML_SAX1_ENABLED
11369 		if (ctxt->sax2)
11370 #endif /* LIBXML_SAX1_ENABLED */
11371 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11372 #ifdef LIBXML_SAX1_ENABLED
11373 		else
11374 		    name = xmlParseStartTag(ctxt);
11375 #endif /* LIBXML_SAX1_ENABLED */
11376 		if (ctxt->instate == XML_PARSER_EOF)
11377 		    goto done;
11378 		if (name == NULL) {
11379 		    spacePop(ctxt);
11380 		    xmlHaltParser(ctxt);
11381 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382 			ctxt->sax->endDocument(ctxt->userData);
11383 		    goto done;
11384 		}
11385 #ifdef LIBXML_VALID_ENABLED
11386 		/*
11387 		 * [ VC: Root Element Type ]
11388 		 * The Name in the document type declaration must match
11389 		 * the element type of the root element.
11390 		 */
11391 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11394 #endif /* LIBXML_VALID_ENABLED */
11395 
11396 		/*
11397 		 * Check for an Empty Element.
11398 		 */
11399 		if ((RAW == '/') && (NXT(1) == '>')) {
11400 		    SKIP(2);
11401 
11402 		    if (ctxt->sax2) {
11403 			if ((ctxt->sax != NULL) &&
11404 			    (ctxt->sax->endElementNs != NULL) &&
11405 			    (!ctxt->disableSAX))
11406 			    ctxt->sax->endElementNs(ctxt->userData, name,
11407 			                            prefix, URI);
11408 			if (ctxt->nsNr - nsNr > 0)
11409 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11410 #ifdef LIBXML_SAX1_ENABLED
11411 		    } else {
11412 			if ((ctxt->sax != NULL) &&
11413 			    (ctxt->sax->endElement != NULL) &&
11414 			    (!ctxt->disableSAX))
11415 			    ctxt->sax->endElement(ctxt->userData, name);
11416 #endif /* LIBXML_SAX1_ENABLED */
11417 		    }
11418 		    if (ctxt->instate == XML_PARSER_EOF)
11419 			goto done;
11420 		    spacePop(ctxt);
11421 		    if (ctxt->nameNr == 0) {
11422 			ctxt->instate = XML_PARSER_EPILOG;
11423 		    } else {
11424 			ctxt->instate = XML_PARSER_CONTENT;
11425 		    }
11426                     ctxt->progressive = 1;
11427 		    break;
11428 		}
11429 		if (RAW == '>') {
11430 		    NEXT;
11431 		} else {
11432 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11433 					 "Couldn't find end of Start Tag %s\n",
11434 					 name);
11435 		    nodePop(ctxt);
11436 		    spacePop(ctxt);
11437 		}
11438 		if (ctxt->sax2)
11439 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11440 #ifdef LIBXML_SAX1_ENABLED
11441 		else
11442 		    namePush(ctxt, name);
11443 #endif /* LIBXML_SAX1_ENABLED */
11444 
11445 		ctxt->instate = XML_PARSER_CONTENT;
11446                 ctxt->progressive = 1;
11447                 break;
11448 	    }
11449             case XML_PARSER_CONTENT: {
11450 		const xmlChar *test;
11451 		unsigned int cons;
11452 		if ((avail < 2) && (ctxt->inputNr == 1))
11453 		    goto done;
11454 		cur = ctxt->input->cur[0];
11455 		next = ctxt->input->cur[1];
11456 
11457 		test = CUR_PTR;
11458 	        cons = ctxt->input->consumed;
11459 		if ((cur == '<') && (next == '/')) {
11460 		    ctxt->instate = XML_PARSER_END_TAG;
11461 		    break;
11462 	        } else if ((cur == '<') && (next == '?')) {
11463 		    if ((!terminate) &&
11464 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11465                         ctxt->progressive = XML_PARSER_PI;
11466 			goto done;
11467                     }
11468 		    xmlParsePI(ctxt);
11469 		    ctxt->instate = XML_PARSER_CONTENT;
11470                     ctxt->progressive = 1;
11471 		} else if ((cur == '<') && (next != '!')) {
11472 		    ctxt->instate = XML_PARSER_START_TAG;
11473 		    break;
11474 		} else if ((cur == '<') && (next == '!') &&
11475 		           (ctxt->input->cur[2] == '-') &&
11476 			   (ctxt->input->cur[3] == '-')) {
11477 		    int term;
11478 
11479 	            if (avail < 4)
11480 		        goto done;
11481 		    ctxt->input->cur += 4;
11482 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483 		    ctxt->input->cur -= 4;
11484 		    if ((!terminate) && (term < 0)) {
11485                         ctxt->progressive = XML_PARSER_COMMENT;
11486 			goto done;
11487                     }
11488 		    xmlParseComment(ctxt);
11489 		    ctxt->instate = XML_PARSER_CONTENT;
11490                     ctxt->progressive = 1;
11491 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492 		    (ctxt->input->cur[2] == '[') &&
11493 		    (ctxt->input->cur[3] == 'C') &&
11494 		    (ctxt->input->cur[4] == 'D') &&
11495 		    (ctxt->input->cur[5] == 'A') &&
11496 		    (ctxt->input->cur[6] == 'T') &&
11497 		    (ctxt->input->cur[7] == 'A') &&
11498 		    (ctxt->input->cur[8] == '[')) {
11499 		    SKIP(9);
11500 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11501 		    break;
11502 		} else if ((cur == '<') && (next == '!') &&
11503 		           (avail < 9)) {
11504 		    goto done;
11505 		} else if (cur == '&') {
11506 		    if ((!terminate) &&
11507 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11508 			goto done;
11509 		    xmlParseReference(ctxt);
11510 		} else {
11511 		    /* TODO Avoid the extra copy, handle directly !!! */
11512 		    /*
11513 		     * Goal of the following test is:
11514 		     *  - minimize calls to the SAX 'character' callback
11515 		     *    when they are mergeable
11516 		     *  - handle an problem for isBlank when we only parse
11517 		     *    a sequence of blank chars and the next one is
11518 		     *    not available to check against '<' presence.
11519 		     *  - tries to homogenize the differences in SAX
11520 		     *    callbacks between the push and pull versions
11521 		     *    of the parser.
11522 		     */
11523 		    if ((ctxt->inputNr == 1) &&
11524 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11525 			if (!terminate) {
11526 			    if (ctxt->progressive) {
11527 				if ((lastlt == NULL) ||
11528 				    (ctxt->input->cur > lastlt))
11529 				    goto done;
11530 			    } else if (xmlParseLookupSequence(ctxt,
11531 			                                      '<', 0, 0) < 0) {
11532 				goto done;
11533 			    }
11534 			}
11535                     }
11536 		    ctxt->checkIndex = 0;
11537 		    xmlParseCharData(ctxt, 0);
11538 		}
11539 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11540 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11541 		                "detected an error in element content\n");
11542 		    xmlHaltParser(ctxt);
11543 		    break;
11544 		}
11545 		break;
11546 	    }
11547             case XML_PARSER_END_TAG:
11548 		if (avail < 2)
11549 		    goto done;
11550 		if (!terminate) {
11551 		    if (ctxt->progressive) {
11552 		        /* > can be found unescaped in attribute values */
11553 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11554 			    goto done;
11555 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11556 			goto done;
11557 		    }
11558 		}
11559 		if (ctxt->sax2) {
11560 		    xmlParseEndTag2(ctxt,
11561 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11562 		            (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11563 		            (int) (ptrdiff_t)
11564                                 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11565 		    nameNsPop(ctxt);
11566 		}
11567 #ifdef LIBXML_SAX1_ENABLED
11568 		  else
11569 		    xmlParseEndTag1(ctxt, 0);
11570 #endif /* LIBXML_SAX1_ENABLED */
11571 		if (ctxt->instate == XML_PARSER_EOF) {
11572 		    /* Nothing */
11573 		} else if (ctxt->nameNr == 0) {
11574 		    ctxt->instate = XML_PARSER_EPILOG;
11575 		} else {
11576 		    ctxt->instate = XML_PARSER_CONTENT;
11577 		}
11578 		break;
11579             case XML_PARSER_CDATA_SECTION: {
11580 	        /*
11581 		 * The Push mode need to have the SAX callback for
11582 		 * cdataBlock merge back contiguous callbacks.
11583 		 */
11584 		int base;
11585 
11586 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11587 		if (base < 0) {
11588 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11589 		        int tmp;
11590 
11591 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11592 			                        XML_PARSER_BIG_BUFFER_SIZE, 0);
11593 			if (tmp < 0) {
11594 			    tmp = -tmp;
11595 			    ctxt->input->cur += tmp;
11596 			    goto encoding_error;
11597 			}
11598 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11599 			    if (ctxt->sax->cdataBlock != NULL)
11600 				ctxt->sax->cdataBlock(ctxt->userData,
11601 				                      ctxt->input->cur, tmp);
11602 			    else if (ctxt->sax->characters != NULL)
11603 				ctxt->sax->characters(ctxt->userData,
11604 				                      ctxt->input->cur, tmp);
11605 			}
11606 			if (ctxt->instate == XML_PARSER_EOF)
11607 			    goto done;
11608 			SKIPL(tmp);
11609 			ctxt->checkIndex = 0;
11610 		    }
11611 		    goto done;
11612 		} else {
11613 		    int tmp;
11614 
11615 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11616 		    if ((tmp < 0) || (tmp != base)) {
11617 			tmp = -tmp;
11618 			ctxt->input->cur += tmp;
11619 			goto encoding_error;
11620 		    }
11621 		    if ((ctxt->sax != NULL) && (base == 0) &&
11622 		        (ctxt->sax->cdataBlock != NULL) &&
11623 		        (!ctxt->disableSAX)) {
11624 			/*
11625 			 * Special case to provide identical behaviour
11626 			 * between pull and push parsers on enpty CDATA
11627 			 * sections
11628 			 */
11629 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11630 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11631 			               "<![CDATA[", 9)))
11632 			     ctxt->sax->cdataBlock(ctxt->userData,
11633 			                           BAD_CAST "", 0);
11634 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11635 			(!ctxt->disableSAX)) {
11636 			if (ctxt->sax->cdataBlock != NULL)
11637 			    ctxt->sax->cdataBlock(ctxt->userData,
11638 						  ctxt->input->cur, base);
11639 			else if (ctxt->sax->characters != NULL)
11640 			    ctxt->sax->characters(ctxt->userData,
11641 						  ctxt->input->cur, base);
11642 		    }
11643 		    if (ctxt->instate == XML_PARSER_EOF)
11644 			goto done;
11645 		    SKIPL(base + 3);
11646 		    ctxt->checkIndex = 0;
11647 		    ctxt->instate = XML_PARSER_CONTENT;
11648 #ifdef DEBUG_PUSH
11649 		    xmlGenericError(xmlGenericErrorContext,
11650 			    "PP: entering CONTENT\n");
11651 #endif
11652 		}
11653 		break;
11654 	    }
11655             case XML_PARSER_MISC:
11656 		SKIP_BLANKS;
11657 		if (ctxt->input->buf == NULL)
11658 		    avail = ctxt->input->length -
11659 		            (ctxt->input->cur - ctxt->input->base);
11660 		else
11661 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11662 		            (ctxt->input->cur - ctxt->input->base);
11663 		if (avail < 2)
11664 		    goto done;
11665 		cur = ctxt->input->cur[0];
11666 		next = ctxt->input->cur[1];
11667 	        if ((cur == '<') && (next == '?')) {
11668 		    if ((!terminate) &&
11669 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11670                         ctxt->progressive = XML_PARSER_PI;
11671 			goto done;
11672                     }
11673 #ifdef DEBUG_PUSH
11674 		    xmlGenericError(xmlGenericErrorContext,
11675 			    "PP: Parsing PI\n");
11676 #endif
11677 		    xmlParsePI(ctxt);
11678 		    if (ctxt->instate == XML_PARSER_EOF)
11679 			goto done;
11680 		    ctxt->instate = XML_PARSER_MISC;
11681                     ctxt->progressive = 1;
11682 		    ctxt->checkIndex = 0;
11683 		} else if ((cur == '<') && (next == '!') &&
11684 		    (ctxt->input->cur[2] == '-') &&
11685 		    (ctxt->input->cur[3] == '-')) {
11686 		    if ((!terminate) &&
11687 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11688                         ctxt->progressive = XML_PARSER_COMMENT;
11689 			goto done;
11690                     }
11691 #ifdef DEBUG_PUSH
11692 		    xmlGenericError(xmlGenericErrorContext,
11693 			    "PP: Parsing Comment\n");
11694 #endif
11695 		    xmlParseComment(ctxt);
11696 		    if (ctxt->instate == XML_PARSER_EOF)
11697 			goto done;
11698 		    ctxt->instate = XML_PARSER_MISC;
11699                     ctxt->progressive = 1;
11700 		    ctxt->checkIndex = 0;
11701 		} else if ((cur == '<') && (next == '!') &&
11702 		    (ctxt->input->cur[2] == 'D') &&
11703 		    (ctxt->input->cur[3] == 'O') &&
11704 		    (ctxt->input->cur[4] == 'C') &&
11705 		    (ctxt->input->cur[5] == 'T') &&
11706 		    (ctxt->input->cur[6] == 'Y') &&
11707 		    (ctxt->input->cur[7] == 'P') &&
11708 		    (ctxt->input->cur[8] == 'E')) {
11709 		    if ((!terminate) &&
11710 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11711                         ctxt->progressive = XML_PARSER_DTD;
11712 			goto done;
11713                     }
11714 #ifdef DEBUG_PUSH
11715 		    xmlGenericError(xmlGenericErrorContext,
11716 			    "PP: Parsing internal subset\n");
11717 #endif
11718 		    ctxt->inSubset = 1;
11719                     ctxt->progressive = 0;
11720 		    ctxt->checkIndex = 0;
11721 		    xmlParseDocTypeDecl(ctxt);
11722 		    if (ctxt->instate == XML_PARSER_EOF)
11723 			goto done;
11724 		    if (RAW == '[') {
11725 			ctxt->instate = XML_PARSER_DTD;
11726 #ifdef DEBUG_PUSH
11727 			xmlGenericError(xmlGenericErrorContext,
11728 				"PP: entering DTD\n");
11729 #endif
11730 		    } else {
11731 			/*
11732 			 * Create and update the external subset.
11733 			 */
11734 			ctxt->inSubset = 2;
11735 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11736 			    (ctxt->sax->externalSubset != NULL))
11737 			    ctxt->sax->externalSubset(ctxt->userData,
11738 				    ctxt->intSubName, ctxt->extSubSystem,
11739 				    ctxt->extSubURI);
11740 			ctxt->inSubset = 0;
11741 			xmlCleanSpecialAttr(ctxt);
11742 			ctxt->instate = XML_PARSER_PROLOG;
11743 #ifdef DEBUG_PUSH
11744 			xmlGenericError(xmlGenericErrorContext,
11745 				"PP: entering PROLOG\n");
11746 #endif
11747 		    }
11748 		} else if ((cur == '<') && (next == '!') &&
11749 		           (avail < 9)) {
11750 		    goto done;
11751 		} else {
11752 		    ctxt->instate = XML_PARSER_START_TAG;
11753 		    ctxt->progressive = XML_PARSER_START_TAG;
11754 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11755 #ifdef DEBUG_PUSH
11756 		    xmlGenericError(xmlGenericErrorContext,
11757 			    "PP: entering START_TAG\n");
11758 #endif
11759 		}
11760 		break;
11761             case XML_PARSER_PROLOG:
11762 		SKIP_BLANKS;
11763 		if (ctxt->input->buf == NULL)
11764 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11765 		else
11766 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11767                             (ctxt->input->cur - ctxt->input->base);
11768 		if (avail < 2)
11769 		    goto done;
11770 		cur = ctxt->input->cur[0];
11771 		next = ctxt->input->cur[1];
11772 	        if ((cur == '<') && (next == '?')) {
11773 		    if ((!terminate) &&
11774 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11775                         ctxt->progressive = XML_PARSER_PI;
11776 			goto done;
11777                     }
11778 #ifdef DEBUG_PUSH
11779 		    xmlGenericError(xmlGenericErrorContext,
11780 			    "PP: Parsing PI\n");
11781 #endif
11782 		    xmlParsePI(ctxt);
11783 		    if (ctxt->instate == XML_PARSER_EOF)
11784 			goto done;
11785 		    ctxt->instate = XML_PARSER_PROLOG;
11786                     ctxt->progressive = 1;
11787 		} else if ((cur == '<') && (next == '!') &&
11788 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11789 		    if ((!terminate) &&
11790 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11791                         ctxt->progressive = XML_PARSER_COMMENT;
11792 			goto done;
11793                     }
11794 #ifdef DEBUG_PUSH
11795 		    xmlGenericError(xmlGenericErrorContext,
11796 			    "PP: Parsing Comment\n");
11797 #endif
11798 		    xmlParseComment(ctxt);
11799 		    if (ctxt->instate == XML_PARSER_EOF)
11800 			goto done;
11801 		    ctxt->instate = XML_PARSER_PROLOG;
11802                     ctxt->progressive = 1;
11803 		} else if ((cur == '<') && (next == '!') &&
11804 		           (avail < 4)) {
11805 		    goto done;
11806 		} else {
11807 		    ctxt->instate = XML_PARSER_START_TAG;
11808 		    if (ctxt->progressive == 0)
11809 			ctxt->progressive = XML_PARSER_START_TAG;
11810 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11811 #ifdef DEBUG_PUSH
11812 		    xmlGenericError(xmlGenericErrorContext,
11813 			    "PP: entering START_TAG\n");
11814 #endif
11815 		}
11816 		break;
11817             case XML_PARSER_EPILOG:
11818 		SKIP_BLANKS;
11819 		if (ctxt->input->buf == NULL)
11820 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11821 		else
11822 		    avail = xmlBufUse(ctxt->input->buf->buffer) -
11823                             (ctxt->input->cur - ctxt->input->base);
11824 		if (avail < 2)
11825 		    goto done;
11826 		cur = ctxt->input->cur[0];
11827 		next = ctxt->input->cur[1];
11828 	        if ((cur == '<') && (next == '?')) {
11829 		    if ((!terminate) &&
11830 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11831                         ctxt->progressive = XML_PARSER_PI;
11832 			goto done;
11833                     }
11834 #ifdef DEBUG_PUSH
11835 		    xmlGenericError(xmlGenericErrorContext,
11836 			    "PP: Parsing PI\n");
11837 #endif
11838 		    xmlParsePI(ctxt);
11839 		    if (ctxt->instate == XML_PARSER_EOF)
11840 			goto done;
11841 		    ctxt->instate = XML_PARSER_EPILOG;
11842                     ctxt->progressive = 1;
11843 		} else if ((cur == '<') && (next == '!') &&
11844 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11845 		    if ((!terminate) &&
11846 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11847                         ctxt->progressive = XML_PARSER_COMMENT;
11848 			goto done;
11849                     }
11850 #ifdef DEBUG_PUSH
11851 		    xmlGenericError(xmlGenericErrorContext,
11852 			    "PP: Parsing Comment\n");
11853 #endif
11854 		    xmlParseComment(ctxt);
11855 		    if (ctxt->instate == XML_PARSER_EOF)
11856 			goto done;
11857 		    ctxt->instate = XML_PARSER_EPILOG;
11858                     ctxt->progressive = 1;
11859 		} else if ((cur == '<') && (next == '!') &&
11860 		           (avail < 4)) {
11861 		    goto done;
11862 		} else {
11863 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11864 		    xmlHaltParser(ctxt);
11865 #ifdef DEBUG_PUSH
11866 		    xmlGenericError(xmlGenericErrorContext,
11867 			    "PP: entering EOF\n");
11868 #endif
11869 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11870 			ctxt->sax->endDocument(ctxt->userData);
11871 		    goto done;
11872 		}
11873 		break;
11874             case XML_PARSER_DTD: {
11875 	        /*
11876 		 * Sorry but progressive parsing of the internal subset
11877 		 * is not expected to be supported. We first check that
11878 		 * the full content of the internal subset is available and
11879 		 * the parsing is launched only at that point.
11880 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11881 		 * section and not in a ']]>' sequence which are conditional
11882 		 * sections (whoever argued to keep that crap in XML deserve
11883 		 * a place in hell !).
11884 		 */
11885 		int base, i;
11886 		xmlChar *buf;
11887 	        xmlChar quote = 0;
11888                 size_t use;
11889 
11890 		base = ctxt->input->cur - ctxt->input->base;
11891 		if (base < 0) return(0);
11892 		if (ctxt->checkIndex > base)
11893 		    base = ctxt->checkIndex;
11894 		buf = xmlBufContent(ctxt->input->buf->buffer);
11895                 use = xmlBufUse(ctxt->input->buf->buffer);
11896 		for (;(unsigned int) base < use; base++) {
11897 		    if (quote != 0) {
11898 		        if (buf[base] == quote)
11899 			    quote = 0;
11900 			continue;
11901 		    }
11902 		    if ((quote == 0) && (buf[base] == '<')) {
11903 		        int found  = 0;
11904 			/* special handling of comments */
11905 		        if (((unsigned int) base + 4 < use) &&
11906 			    (buf[base + 1] == '!') &&
11907 			    (buf[base + 2] == '-') &&
11908 			    (buf[base + 3] == '-')) {
11909 			    for (;(unsigned int) base + 3 < use; base++) {
11910 				if ((buf[base] == '-') &&
11911 				    (buf[base + 1] == '-') &&
11912 				    (buf[base + 2] == '>')) {
11913 				    found = 1;
11914 				    base += 2;
11915 				    break;
11916 				}
11917 		            }
11918 			    if (!found) {
11919 #if 0
11920 			        fprintf(stderr, "unfinished comment\n");
11921 #endif
11922 			        break; /* for */
11923 		            }
11924 		            continue;
11925 			}
11926 		    }
11927 		    if (buf[base] == '"') {
11928 		        quote = '"';
11929 			continue;
11930 		    }
11931 		    if (buf[base] == '\'') {
11932 		        quote = '\'';
11933 			continue;
11934 		    }
11935 		    if (buf[base] == ']') {
11936 #if 0
11937 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11938 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11939 #endif
11940 		        if ((unsigned int) base +1 >= use)
11941 			    break;
11942 			if (buf[base + 1] == ']') {
11943 			    /* conditional crap, skip both ']' ! */
11944 			    base++;
11945 			    continue;
11946 			}
11947 		        for (i = 1; (unsigned int) base + i < use; i++) {
11948 			    if (buf[base + i] == '>') {
11949 #if 0
11950 			        fprintf(stderr, "found\n");
11951 #endif
11952 			        goto found_end_int_subset;
11953 			    }
11954 			    if (!IS_BLANK_CH(buf[base + i])) {
11955 #if 0
11956 			        fprintf(stderr, "not found\n");
11957 #endif
11958 			        goto not_end_of_int_subset;
11959 			    }
11960 			}
11961 #if 0
11962 			fprintf(stderr, "end of stream\n");
11963 #endif
11964 		        break;
11965 
11966 		    }
11967 not_end_of_int_subset:
11968                     continue; /* for */
11969 		}
11970 		/*
11971 		 * We didn't found the end of the Internal subset
11972 		 */
11973                 if (quote == 0)
11974                     ctxt->checkIndex = base;
11975                 else
11976                     ctxt->checkIndex = 0;
11977 #ifdef DEBUG_PUSH
11978 		if (next == 0)
11979 		    xmlGenericError(xmlGenericErrorContext,
11980 			    "PP: lookup of int subset end filed\n");
11981 #endif
11982 	        goto done;
11983 
11984 found_end_int_subset:
11985                 ctxt->checkIndex = 0;
11986 		xmlParseInternalSubset(ctxt);
11987 		if (ctxt->instate == XML_PARSER_EOF)
11988 		    goto done;
11989 		ctxt->inSubset = 2;
11990 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11991 		    (ctxt->sax->externalSubset != NULL))
11992 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11993 			    ctxt->extSubSystem, ctxt->extSubURI);
11994 		ctxt->inSubset = 0;
11995 		xmlCleanSpecialAttr(ctxt);
11996 		if (ctxt->instate == XML_PARSER_EOF)
11997 		    goto done;
11998 		ctxt->instate = XML_PARSER_PROLOG;
11999 		ctxt->checkIndex = 0;
12000 #ifdef DEBUG_PUSH
12001 		xmlGenericError(xmlGenericErrorContext,
12002 			"PP: entering PROLOG\n");
12003 #endif
12004                 break;
12005 	    }
12006             case XML_PARSER_COMMENT:
12007 		xmlGenericError(xmlGenericErrorContext,
12008 			"PP: internal error, state == COMMENT\n");
12009 		ctxt->instate = XML_PARSER_CONTENT;
12010 #ifdef DEBUG_PUSH
12011 		xmlGenericError(xmlGenericErrorContext,
12012 			"PP: entering CONTENT\n");
12013 #endif
12014 		break;
12015             case XML_PARSER_IGNORE:
12016 		xmlGenericError(xmlGenericErrorContext,
12017 			"PP: internal error, state == IGNORE");
12018 	        ctxt->instate = XML_PARSER_DTD;
12019 #ifdef DEBUG_PUSH
12020 		xmlGenericError(xmlGenericErrorContext,
12021 			"PP: entering DTD\n");
12022 #endif
12023 	        break;
12024             case XML_PARSER_PI:
12025 		xmlGenericError(xmlGenericErrorContext,
12026 			"PP: internal error, state == PI\n");
12027 		ctxt->instate = XML_PARSER_CONTENT;
12028 #ifdef DEBUG_PUSH
12029 		xmlGenericError(xmlGenericErrorContext,
12030 			"PP: entering CONTENT\n");
12031 #endif
12032 		break;
12033             case XML_PARSER_ENTITY_DECL:
12034 		xmlGenericError(xmlGenericErrorContext,
12035 			"PP: internal error, state == ENTITY_DECL\n");
12036 		ctxt->instate = XML_PARSER_DTD;
12037 #ifdef DEBUG_PUSH
12038 		xmlGenericError(xmlGenericErrorContext,
12039 			"PP: entering DTD\n");
12040 #endif
12041 		break;
12042             case XML_PARSER_ENTITY_VALUE:
12043 		xmlGenericError(xmlGenericErrorContext,
12044 			"PP: internal error, state == ENTITY_VALUE\n");
12045 		ctxt->instate = XML_PARSER_CONTENT;
12046 #ifdef DEBUG_PUSH
12047 		xmlGenericError(xmlGenericErrorContext,
12048 			"PP: entering DTD\n");
12049 #endif
12050 		break;
12051             case XML_PARSER_ATTRIBUTE_VALUE:
12052 		xmlGenericError(xmlGenericErrorContext,
12053 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
12054 		ctxt->instate = XML_PARSER_START_TAG;
12055 #ifdef DEBUG_PUSH
12056 		xmlGenericError(xmlGenericErrorContext,
12057 			"PP: entering START_TAG\n");
12058 #endif
12059 		break;
12060             case XML_PARSER_SYSTEM_LITERAL:
12061 		xmlGenericError(xmlGenericErrorContext,
12062 			"PP: internal error, state == SYSTEM_LITERAL\n");
12063 		ctxt->instate = XML_PARSER_START_TAG;
12064 #ifdef DEBUG_PUSH
12065 		xmlGenericError(xmlGenericErrorContext,
12066 			"PP: entering START_TAG\n");
12067 #endif
12068 		break;
12069             case XML_PARSER_PUBLIC_LITERAL:
12070 		xmlGenericError(xmlGenericErrorContext,
12071 			"PP: internal error, state == PUBLIC_LITERAL\n");
12072 		ctxt->instate = XML_PARSER_START_TAG;
12073 #ifdef DEBUG_PUSH
12074 		xmlGenericError(xmlGenericErrorContext,
12075 			"PP: entering START_TAG\n");
12076 #endif
12077 		break;
12078 	}
12079     }
12080 done:
12081 #ifdef DEBUG_PUSH
12082     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12083 #endif
12084     return(ret);
12085 encoding_error:
12086     {
12087         char buffer[150];
12088 
12089 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12090 			ctxt->input->cur[0], ctxt->input->cur[1],
12091 			ctxt->input->cur[2], ctxt->input->cur[3]);
12092 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12093 		     "Input is not proper UTF-8, indicate encoding !\n%s",
12094 		     BAD_CAST buffer, NULL);
12095     }
12096     return(0);
12097 }
12098 
12099 /**
12100  * xmlParseCheckTransition:
12101  * @ctxt:  an XML parser context
12102  * @chunk:  a char array
12103  * @size:  the size in byte of the chunk
12104  *
12105  * Check depending on the current parser state if the chunk given must be
12106  * processed immediately or one need more data to advance on parsing.
12107  *
12108  * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12109  */
12110 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12111 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12112     if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12113         return(-1);
12114     if (ctxt->instate == XML_PARSER_START_TAG) {
12115         if (memchr(chunk, '>', size) != NULL)
12116             return(1);
12117         return(0);
12118     }
12119     if (ctxt->progressive == XML_PARSER_COMMENT) {
12120         if (memchr(chunk, '>', size) != NULL)
12121             return(1);
12122         return(0);
12123     }
12124     if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12125         if (memchr(chunk, '>', size) != NULL)
12126             return(1);
12127         return(0);
12128     }
12129     if (ctxt->progressive == XML_PARSER_PI) {
12130         if (memchr(chunk, '>', size) != NULL)
12131             return(1);
12132         return(0);
12133     }
12134     if (ctxt->instate == XML_PARSER_END_TAG) {
12135         if (memchr(chunk, '>', size) != NULL)
12136             return(1);
12137         return(0);
12138     }
12139     if ((ctxt->progressive == XML_PARSER_DTD) ||
12140         (ctxt->instate == XML_PARSER_DTD)) {
12141         if (memchr(chunk, '>', size) != NULL)
12142             return(1);
12143         return(0);
12144     }
12145     return(1);
12146 }
12147 
12148 /**
12149  * xmlParseChunk:
12150  * @ctxt:  an XML parser context
12151  * @chunk:  an char array
12152  * @size:  the size in byte of the chunk
12153  * @terminate:  last chunk indicator
12154  *
12155  * Parse a Chunk of memory
12156  *
12157  * Returns zero if no error, the xmlParserErrors otherwise.
12158  */
12159 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12160 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12161               int terminate) {
12162     int end_in_lf = 0;
12163     int remain = 0;
12164     size_t old_avail = 0;
12165     size_t avail = 0;
12166 
12167     if (ctxt == NULL)
12168         return(XML_ERR_INTERNAL_ERROR);
12169     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12170         return(ctxt->errNo);
12171     if (ctxt->instate == XML_PARSER_EOF)
12172         return(-1);
12173     if (ctxt->instate == XML_PARSER_START)
12174         xmlDetectSAX2(ctxt);
12175     if ((size > 0) && (chunk != NULL) && (!terminate) &&
12176         (chunk[size - 1] == '\r')) {
12177 	end_in_lf = 1;
12178 	size--;
12179     }
12180 
12181 xmldecl_done:
12182 
12183     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12184         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12185 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12186 	size_t cur = ctxt->input->cur - ctxt->input->base;
12187 	int res;
12188 
12189         old_avail = xmlBufUse(ctxt->input->buf->buffer);
12190         /*
12191          * Specific handling if we autodetected an encoding, we should not
12192          * push more than the first line ... which depend on the encoding
12193          * And only push the rest once the final encoding was detected
12194          */
12195         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12196             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12197             unsigned int len = 45;
12198 
12199             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12200                                BAD_CAST "UTF-16")) ||
12201                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12202                                BAD_CAST "UTF16")))
12203                 len = 90;
12204             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12205                                     BAD_CAST "UCS-4")) ||
12206                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12207                                     BAD_CAST "UCS4")))
12208                 len = 180;
12209 
12210             if (ctxt->input->buf->rawconsumed < len)
12211                 len -= ctxt->input->buf->rawconsumed;
12212 
12213             /*
12214              * Change size for reading the initial declaration only
12215              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12216              * will blindly copy extra bytes from memory.
12217              */
12218             if ((unsigned int) size > len) {
12219                 remain = size - len;
12220                 size = len;
12221             } else {
12222                 remain = 0;
12223             }
12224         }
12225 	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12226 	if (res < 0) {
12227 	    ctxt->errNo = XML_PARSER_EOF;
12228 	    xmlHaltParser(ctxt);
12229 	    return (XML_PARSER_EOF);
12230 	}
12231         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12232 #ifdef DEBUG_PUSH
12233 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12234 #endif
12235 
12236     } else if (ctxt->instate != XML_PARSER_EOF) {
12237 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12238 	    xmlParserInputBufferPtr in = ctxt->input->buf;
12239 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
12240 		    (in->raw != NULL)) {
12241 		int nbchars;
12242 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12243 		size_t current = ctxt->input->cur - ctxt->input->base;
12244 
12245 		nbchars = xmlCharEncInput(in, terminate);
12246 		if (nbchars < 0) {
12247 		    /* TODO 2.6.0 */
12248 		    xmlGenericError(xmlGenericErrorContext,
12249 				    "xmlParseChunk: encoder error\n");
12250                     xmlHaltParser(ctxt);
12251 		    return(XML_ERR_INVALID_ENCODING);
12252 		}
12253 		xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12254 	    }
12255 	}
12256     }
12257     if (remain != 0) {
12258         xmlParseTryOrFinish(ctxt, 0);
12259     } else {
12260         if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12261             avail = xmlBufUse(ctxt->input->buf->buffer);
12262         /*
12263          * Depending on the current state it may not be such
12264          * a good idea to try parsing if there is nothing in the chunk
12265          * which would be worth doing a parser state transition and we
12266          * need to wait for more data
12267          */
12268         if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12269             (old_avail == 0) || (avail == 0) ||
12270             (xmlParseCheckTransition(ctxt,
12271                        (const char *)&ctxt->input->base[old_avail],
12272                                      avail - old_avail)))
12273             xmlParseTryOrFinish(ctxt, terminate);
12274     }
12275     if (ctxt->instate == XML_PARSER_EOF)
12276         return(ctxt->errNo);
12277 
12278     if ((ctxt->input != NULL) &&
12279          (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12280          ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12281         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12282         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12283         xmlHaltParser(ctxt);
12284     }
12285     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12286         return(ctxt->errNo);
12287 
12288     if (remain != 0) {
12289         chunk += size;
12290         size = remain;
12291         remain = 0;
12292         goto xmldecl_done;
12293     }
12294     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12295         (ctxt->input->buf != NULL)) {
12296 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12297 					 ctxt->input);
12298 	size_t current = ctxt->input->cur - ctxt->input->base;
12299 
12300 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12301 
12302 	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12303 			      base, current);
12304     }
12305     if (terminate) {
12306 	/*
12307 	 * Check for termination
12308 	 */
12309 	int cur_avail = 0;
12310 
12311 	if (ctxt->input != NULL) {
12312 	    if (ctxt->input->buf == NULL)
12313 		cur_avail = ctxt->input->length -
12314 			    (ctxt->input->cur - ctxt->input->base);
12315 	    else
12316 		cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12317 			              (ctxt->input->cur - ctxt->input->base);
12318 	}
12319 
12320 	if ((ctxt->instate != XML_PARSER_EOF) &&
12321 	    (ctxt->instate != XML_PARSER_EPILOG)) {
12322 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12323 	}
12324 	if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12325 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12326 	}
12327 	if (ctxt->instate != XML_PARSER_EOF) {
12328 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12329 		ctxt->sax->endDocument(ctxt->userData);
12330 	}
12331 	ctxt->instate = XML_PARSER_EOF;
12332     }
12333     if (ctxt->wellFormed == 0)
12334 	return((xmlParserErrors) ctxt->errNo);
12335     else
12336         return(0);
12337 }
12338 
12339 /************************************************************************
12340  *									*
12341  *		I/O front end functions to the parser			*
12342  *									*
12343  ************************************************************************/
12344 
12345 /**
12346  * xmlCreatePushParserCtxt:
12347  * @sax:  a SAX handler
12348  * @user_data:  The user data returned on SAX callbacks
12349  * @chunk:  a pointer to an array of chars
12350  * @size:  number of chars in the array
12351  * @filename:  an optional file name or URI
12352  *
12353  * Create a parser context for using the XML parser in push mode.
12354  * If @buffer and @size are non-NULL, the data is used to detect
12355  * the encoding.  The remaining characters will be parsed so they
12356  * don't need to be fed in again through xmlParseChunk.
12357  * To allow content encoding detection, @size should be >= 4
12358  * The value of @filename is used for fetching external entities
12359  * and error/warning reports.
12360  *
12361  * Returns the new parser context or NULL
12362  */
12363 
12364 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12365 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12366                         const char *chunk, int size, const char *filename) {
12367     xmlParserCtxtPtr ctxt;
12368     xmlParserInputPtr inputStream;
12369     xmlParserInputBufferPtr buf;
12370     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12371 
12372     /*
12373      * plug some encoding conversion routines
12374      */
12375     if ((chunk != NULL) && (size >= 4))
12376 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12377 
12378     buf = xmlAllocParserInputBuffer(enc);
12379     if (buf == NULL) return(NULL);
12380 
12381     ctxt = xmlNewParserCtxt();
12382     if (ctxt == NULL) {
12383         xmlErrMemory(NULL, "creating parser: out of memory\n");
12384 	xmlFreeParserInputBuffer(buf);
12385 	return(NULL);
12386     }
12387     ctxt->dictNames = 1;
12388     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12389     if (ctxt->pushTab == NULL) {
12390         xmlErrMemory(ctxt, NULL);
12391 	xmlFreeParserInputBuffer(buf);
12392 	xmlFreeParserCtxt(ctxt);
12393 	return(NULL);
12394     }
12395     if (sax != NULL) {
12396 #ifdef LIBXML_SAX1_ENABLED
12397 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12398 #endif /* LIBXML_SAX1_ENABLED */
12399 	    xmlFree(ctxt->sax);
12400 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12401 	if (ctxt->sax == NULL) {
12402 	    xmlErrMemory(ctxt, NULL);
12403 	    xmlFreeParserInputBuffer(buf);
12404 	    xmlFreeParserCtxt(ctxt);
12405 	    return(NULL);
12406 	}
12407 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12408 	if (sax->initialized == XML_SAX2_MAGIC)
12409 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12410 	else
12411 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12412 	if (user_data != NULL)
12413 	    ctxt->userData = user_data;
12414     }
12415     if (filename == NULL) {
12416 	ctxt->directory = NULL;
12417     } else {
12418         ctxt->directory = xmlParserGetDirectory(filename);
12419     }
12420 
12421     inputStream = xmlNewInputStream(ctxt);
12422     if (inputStream == NULL) {
12423 	xmlFreeParserCtxt(ctxt);
12424 	xmlFreeParserInputBuffer(buf);
12425 	return(NULL);
12426     }
12427 
12428     if (filename == NULL)
12429 	inputStream->filename = NULL;
12430     else {
12431 	inputStream->filename = (char *)
12432 	    xmlCanonicPath((const xmlChar *) filename);
12433 	if (inputStream->filename == NULL) {
12434 	    xmlFreeParserCtxt(ctxt);
12435 	    xmlFreeParserInputBuffer(buf);
12436 	    return(NULL);
12437 	}
12438     }
12439     inputStream->buf = buf;
12440     xmlBufResetInput(inputStream->buf->buffer, inputStream);
12441     inputPush(ctxt, inputStream);
12442 
12443     /*
12444      * If the caller didn't provide an initial 'chunk' for determining
12445      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12446      * that it can be automatically determined later
12447      */
12448     if ((size == 0) || (chunk == NULL)) {
12449 	ctxt->charset = XML_CHAR_ENCODING_NONE;
12450     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12451 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12452 	size_t cur = ctxt->input->cur - ctxt->input->base;
12453 
12454 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12455 
12456         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12457 #ifdef DEBUG_PUSH
12458 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12459 #endif
12460     }
12461 
12462     if (enc != XML_CHAR_ENCODING_NONE) {
12463         xmlSwitchEncoding(ctxt, enc);
12464     }
12465 
12466     return(ctxt);
12467 }
12468 #endif /* LIBXML_PUSH_ENABLED */
12469 
12470 /**
12471  * xmlHaltParser:
12472  * @ctxt:  an XML parser context
12473  *
12474  * Blocks further parser processing don't override error
12475  * for internal use
12476  */
12477 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12478 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12479     if (ctxt == NULL)
12480         return;
12481     ctxt->instate = XML_PARSER_EOF;
12482     ctxt->disableSAX = 1;
12483     while (ctxt->inputNr > 1)
12484         xmlFreeInputStream(inputPop(ctxt));
12485     if (ctxt->input != NULL) {
12486         /*
12487 	 * in case there was a specific allocation deallocate before
12488 	 * overriding base
12489 	 */
12490         if (ctxt->input->free != NULL) {
12491 	    ctxt->input->free((xmlChar *) ctxt->input->base);
12492 	    ctxt->input->free = NULL;
12493 	}
12494         if (ctxt->input->buf != NULL) {
12495             xmlFreeParserInputBuffer(ctxt->input->buf);
12496             ctxt->input->buf = NULL;
12497         }
12498 	ctxt->input->cur = BAD_CAST"";
12499         ctxt->input->length = 0;
12500 	ctxt->input->base = ctxt->input->cur;
12501         ctxt->input->end = ctxt->input->cur;
12502     }
12503 }
12504 
12505 /**
12506  * xmlStopParser:
12507  * @ctxt:  an XML parser context
12508  *
12509  * Blocks further parser processing
12510  */
12511 void
xmlStopParser(xmlParserCtxtPtr ctxt)12512 xmlStopParser(xmlParserCtxtPtr ctxt) {
12513     if (ctxt == NULL)
12514         return;
12515     xmlHaltParser(ctxt);
12516     ctxt->errNo = XML_ERR_USER_STOP;
12517 }
12518 
12519 /**
12520  * xmlCreateIOParserCtxt:
12521  * @sax:  a SAX handler
12522  * @user_data:  The user data returned on SAX callbacks
12523  * @ioread:  an I/O read function
12524  * @ioclose:  an I/O close function
12525  * @ioctx:  an I/O handler
12526  * @enc:  the charset encoding if known
12527  *
12528  * Create a parser context for using the XML parser with an existing
12529  * I/O stream
12530  *
12531  * Returns the new parser context or NULL
12532  */
12533 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12534 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12535 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12536 	void *ioctx, xmlCharEncoding enc) {
12537     xmlParserCtxtPtr ctxt;
12538     xmlParserInputPtr inputStream;
12539     xmlParserInputBufferPtr buf;
12540 
12541     if (ioread == NULL) return(NULL);
12542 
12543     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12544     if (buf == NULL) {
12545         if (ioclose != NULL)
12546             ioclose(ioctx);
12547         return (NULL);
12548     }
12549 
12550     ctxt = xmlNewParserCtxt();
12551     if (ctxt == NULL) {
12552 	xmlFreeParserInputBuffer(buf);
12553 	return(NULL);
12554     }
12555     if (sax != NULL) {
12556 #ifdef LIBXML_SAX1_ENABLED
12557 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12558 #endif /* LIBXML_SAX1_ENABLED */
12559 	    xmlFree(ctxt->sax);
12560 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12561 	if (ctxt->sax == NULL) {
12562 	    xmlErrMemory(ctxt, NULL);
12563 	    xmlFreeParserCtxt(ctxt);
12564 	    return(NULL);
12565 	}
12566 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12567 	if (sax->initialized == XML_SAX2_MAGIC)
12568 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12569 	else
12570 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12571 	if (user_data != NULL)
12572 	    ctxt->userData = user_data;
12573     }
12574 
12575     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12576     if (inputStream == NULL) {
12577 	xmlFreeParserCtxt(ctxt);
12578 	return(NULL);
12579     }
12580     inputPush(ctxt, inputStream);
12581 
12582     return(ctxt);
12583 }
12584 
12585 #ifdef LIBXML_VALID_ENABLED
12586 /************************************************************************
12587  *									*
12588  *		Front ends when parsing a DTD				*
12589  *									*
12590  ************************************************************************/
12591 
12592 /**
12593  * xmlIOParseDTD:
12594  * @sax:  the SAX handler block or NULL
12595  * @input:  an Input Buffer
12596  * @enc:  the charset encoding if known
12597  *
12598  * Load and parse a DTD
12599  *
12600  * Returns the resulting xmlDtdPtr or NULL in case of error.
12601  * @input will be freed by the function in any case.
12602  */
12603 
12604 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12605 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12606 	      xmlCharEncoding enc) {
12607     xmlDtdPtr ret = NULL;
12608     xmlParserCtxtPtr ctxt;
12609     xmlParserInputPtr pinput = NULL;
12610     xmlChar start[4];
12611 
12612     if (input == NULL)
12613 	return(NULL);
12614 
12615     ctxt = xmlNewParserCtxt();
12616     if (ctxt == NULL) {
12617         xmlFreeParserInputBuffer(input);
12618 	return(NULL);
12619     }
12620 
12621     /* We are loading a DTD */
12622     ctxt->options |= XML_PARSE_DTDLOAD;
12623 
12624     /*
12625      * Set-up the SAX context
12626      */
12627     if (sax != NULL) {
12628 	if (ctxt->sax != NULL)
12629 	    xmlFree(ctxt->sax);
12630         ctxt->sax = sax;
12631         ctxt->userData = ctxt;
12632     }
12633     xmlDetectSAX2(ctxt);
12634 
12635     /*
12636      * generate a parser input from the I/O handler
12637      */
12638 
12639     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12640     if (pinput == NULL) {
12641         if (sax != NULL) ctxt->sax = NULL;
12642         xmlFreeParserInputBuffer(input);
12643 	xmlFreeParserCtxt(ctxt);
12644 	return(NULL);
12645     }
12646 
12647     /*
12648      * plug some encoding conversion routines here.
12649      */
12650     if (xmlPushInput(ctxt, pinput) < 0) {
12651         if (sax != NULL) ctxt->sax = NULL;
12652 	xmlFreeParserCtxt(ctxt);
12653 	return(NULL);
12654     }
12655     if (enc != XML_CHAR_ENCODING_NONE) {
12656         xmlSwitchEncoding(ctxt, enc);
12657     }
12658 
12659     pinput->filename = NULL;
12660     pinput->line = 1;
12661     pinput->col = 1;
12662     pinput->base = ctxt->input->cur;
12663     pinput->cur = ctxt->input->cur;
12664     pinput->free = NULL;
12665 
12666     /*
12667      * let's parse that entity knowing it's an external subset.
12668      */
12669     ctxt->inSubset = 2;
12670     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12671     if (ctxt->myDoc == NULL) {
12672 	xmlErrMemory(ctxt, "New Doc failed");
12673 	return(NULL);
12674     }
12675     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12676     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12677 	                               BAD_CAST "none", BAD_CAST "none");
12678 
12679     if ((enc == XML_CHAR_ENCODING_NONE) &&
12680         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12681 	/*
12682 	 * Get the 4 first bytes and decode the charset
12683 	 * if enc != XML_CHAR_ENCODING_NONE
12684 	 * plug some encoding conversion routines.
12685 	 */
12686 	start[0] = RAW;
12687 	start[1] = NXT(1);
12688 	start[2] = NXT(2);
12689 	start[3] = NXT(3);
12690 	enc = xmlDetectCharEncoding(start, 4);
12691 	if (enc != XML_CHAR_ENCODING_NONE) {
12692 	    xmlSwitchEncoding(ctxt, enc);
12693 	}
12694     }
12695 
12696     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12697 
12698     if (ctxt->myDoc != NULL) {
12699 	if (ctxt->wellFormed) {
12700 	    ret = ctxt->myDoc->extSubset;
12701 	    ctxt->myDoc->extSubset = NULL;
12702 	    if (ret != NULL) {
12703 		xmlNodePtr tmp;
12704 
12705 		ret->doc = NULL;
12706 		tmp = ret->children;
12707 		while (tmp != NULL) {
12708 		    tmp->doc = NULL;
12709 		    tmp = tmp->next;
12710 		}
12711 	    }
12712 	} else {
12713 	    ret = NULL;
12714 	}
12715         xmlFreeDoc(ctxt->myDoc);
12716         ctxt->myDoc = NULL;
12717     }
12718     if (sax != NULL) ctxt->sax = NULL;
12719     xmlFreeParserCtxt(ctxt);
12720 
12721     return(ret);
12722 }
12723 
12724 /**
12725  * xmlSAXParseDTD:
12726  * @sax:  the SAX handler block
12727  * @ExternalID:  a NAME* containing the External ID of the DTD
12728  * @SystemID:  a NAME* containing the URL to the DTD
12729  *
12730  * Load and parse an external subset.
12731  *
12732  * Returns the resulting xmlDtdPtr or NULL in case of error.
12733  */
12734 
12735 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12736 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12737                           const xmlChar *SystemID) {
12738     xmlDtdPtr ret = NULL;
12739     xmlParserCtxtPtr ctxt;
12740     xmlParserInputPtr input = NULL;
12741     xmlCharEncoding enc;
12742     xmlChar* systemIdCanonic;
12743 
12744     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12745 
12746     ctxt = xmlNewParserCtxt();
12747     if (ctxt == NULL) {
12748 	return(NULL);
12749     }
12750 
12751     /* We are loading a DTD */
12752     ctxt->options |= XML_PARSE_DTDLOAD;
12753 
12754     /*
12755      * Set-up the SAX context
12756      */
12757     if (sax != NULL) {
12758 	if (ctxt->sax != NULL)
12759 	    xmlFree(ctxt->sax);
12760         ctxt->sax = sax;
12761         ctxt->userData = ctxt;
12762     }
12763 
12764     /*
12765      * Canonicalise the system ID
12766      */
12767     systemIdCanonic = xmlCanonicPath(SystemID);
12768     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12769 	xmlFreeParserCtxt(ctxt);
12770 	return(NULL);
12771     }
12772 
12773     /*
12774      * Ask the Entity resolver to load the damn thing
12775      */
12776 
12777     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12778 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12779 	                                 systemIdCanonic);
12780     if (input == NULL) {
12781         if (sax != NULL) ctxt->sax = NULL;
12782 	xmlFreeParserCtxt(ctxt);
12783 	if (systemIdCanonic != NULL)
12784 	    xmlFree(systemIdCanonic);
12785 	return(NULL);
12786     }
12787 
12788     /*
12789      * plug some encoding conversion routines here.
12790      */
12791     if (xmlPushInput(ctxt, input) < 0) {
12792         if (sax != NULL) ctxt->sax = NULL;
12793 	xmlFreeParserCtxt(ctxt);
12794 	if (systemIdCanonic != NULL)
12795 	    xmlFree(systemIdCanonic);
12796 	return(NULL);
12797     }
12798     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12799 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12800 	xmlSwitchEncoding(ctxt, enc);
12801     }
12802 
12803     if (input->filename == NULL)
12804 	input->filename = (char *) systemIdCanonic;
12805     else
12806 	xmlFree(systemIdCanonic);
12807     input->line = 1;
12808     input->col = 1;
12809     input->base = ctxt->input->cur;
12810     input->cur = ctxt->input->cur;
12811     input->free = NULL;
12812 
12813     /*
12814      * let's parse that entity knowing it's an external subset.
12815      */
12816     ctxt->inSubset = 2;
12817     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12818     if (ctxt->myDoc == NULL) {
12819 	xmlErrMemory(ctxt, "New Doc failed");
12820         if (sax != NULL) ctxt->sax = NULL;
12821 	xmlFreeParserCtxt(ctxt);
12822 	return(NULL);
12823     }
12824     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12825     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12826 	                               ExternalID, SystemID);
12827     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12828 
12829     if (ctxt->myDoc != NULL) {
12830 	if (ctxt->wellFormed) {
12831 	    ret = ctxt->myDoc->extSubset;
12832 	    ctxt->myDoc->extSubset = NULL;
12833 	    if (ret != NULL) {
12834 		xmlNodePtr tmp;
12835 
12836 		ret->doc = NULL;
12837 		tmp = ret->children;
12838 		while (tmp != NULL) {
12839 		    tmp->doc = NULL;
12840 		    tmp = tmp->next;
12841 		}
12842 	    }
12843 	} else {
12844 	    ret = NULL;
12845 	}
12846         xmlFreeDoc(ctxt->myDoc);
12847         ctxt->myDoc = NULL;
12848     }
12849     if (sax != NULL) ctxt->sax = NULL;
12850     xmlFreeParserCtxt(ctxt);
12851 
12852     return(ret);
12853 }
12854 
12855 
12856 /**
12857  * xmlParseDTD:
12858  * @ExternalID:  a NAME* containing the External ID of the DTD
12859  * @SystemID:  a NAME* containing the URL to the DTD
12860  *
12861  * Load and parse an external subset.
12862  *
12863  * Returns the resulting xmlDtdPtr or NULL in case of error.
12864  */
12865 
12866 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12867 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12868     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12869 }
12870 #endif /* LIBXML_VALID_ENABLED */
12871 
12872 /************************************************************************
12873  *									*
12874  *		Front ends when parsing an Entity			*
12875  *									*
12876  ************************************************************************/
12877 
12878 /**
12879  * xmlParseCtxtExternalEntity:
12880  * @ctx:  the existing parsing context
12881  * @URL:  the URL for the entity to load
12882  * @ID:  the System ID for the entity to load
12883  * @lst:  the return value for the set of parsed nodes
12884  *
12885  * Parse an external general entity within an existing parsing context
12886  * An external general parsed entity is well-formed if it matches the
12887  * production labeled extParsedEnt.
12888  *
12889  * [78] extParsedEnt ::= TextDecl? content
12890  *
12891  * Returns 0 if the entity is well formed, -1 in case of args problem and
12892  *    the parser error code otherwise
12893  */
12894 
12895 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12896 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12897 	               const xmlChar *ID, xmlNodePtr *lst) {
12898     xmlParserCtxtPtr ctxt;
12899     xmlDocPtr newDoc;
12900     xmlNodePtr newRoot;
12901     xmlSAXHandlerPtr oldsax = NULL;
12902     int ret = 0;
12903     xmlChar start[4];
12904     xmlCharEncoding enc;
12905 
12906     if (ctx == NULL) return(-1);
12907 
12908     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12909         (ctx->depth > 1024)) {
12910 	return(XML_ERR_ENTITY_LOOP);
12911     }
12912 
12913     if (lst != NULL)
12914         *lst = NULL;
12915     if ((URL == NULL) && (ID == NULL))
12916 	return(-1);
12917     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12918 	return(-1);
12919 
12920     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12921     if (ctxt == NULL) {
12922 	return(-1);
12923     }
12924 
12925     oldsax = ctxt->sax;
12926     ctxt->sax = ctx->sax;
12927     xmlDetectSAX2(ctxt);
12928     newDoc = xmlNewDoc(BAD_CAST "1.0");
12929     if (newDoc == NULL) {
12930 	xmlFreeParserCtxt(ctxt);
12931 	return(-1);
12932     }
12933     newDoc->properties = XML_DOC_INTERNAL;
12934     if (ctx->myDoc->dict) {
12935 	newDoc->dict = ctx->myDoc->dict;
12936 	xmlDictReference(newDoc->dict);
12937     }
12938     if (ctx->myDoc != NULL) {
12939 	newDoc->intSubset = ctx->myDoc->intSubset;
12940 	newDoc->extSubset = ctx->myDoc->extSubset;
12941     }
12942     if (ctx->myDoc->URL != NULL) {
12943 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12944     }
12945     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12946     if (newRoot == NULL) {
12947 	ctxt->sax = oldsax;
12948 	xmlFreeParserCtxt(ctxt);
12949 	newDoc->intSubset = NULL;
12950 	newDoc->extSubset = NULL;
12951         xmlFreeDoc(newDoc);
12952 	return(-1);
12953     }
12954     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12955     nodePush(ctxt, newDoc->children);
12956     if (ctx->myDoc == NULL) {
12957 	ctxt->myDoc = newDoc;
12958     } else {
12959 	ctxt->myDoc = ctx->myDoc;
12960 	newDoc->children->doc = ctx->myDoc;
12961     }
12962 
12963     /*
12964      * Get the 4 first bytes and decode the charset
12965      * if enc != XML_CHAR_ENCODING_NONE
12966      * plug some encoding conversion routines.
12967      */
12968     GROW
12969     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12970 	start[0] = RAW;
12971 	start[1] = NXT(1);
12972 	start[2] = NXT(2);
12973 	start[3] = NXT(3);
12974 	enc = xmlDetectCharEncoding(start, 4);
12975 	if (enc != XML_CHAR_ENCODING_NONE) {
12976 	    xmlSwitchEncoding(ctxt, enc);
12977 	}
12978     }
12979 
12980     /*
12981      * Parse a possible text declaration first
12982      */
12983     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12984 	xmlParseTextDecl(ctxt);
12985 	/*
12986 	 * An XML-1.0 document can't reference an entity not XML-1.0
12987 	 */
12988 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12989 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12990 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12991 	                   "Version mismatch between document and entity\n");
12992 	}
12993     }
12994 
12995     /*
12996      * If the user provided its own SAX callbacks then reuse the
12997      * useData callback field, otherwise the expected setup in a
12998      * DOM builder is to have userData == ctxt
12999      */
13000     if (ctx->userData == ctx)
13001         ctxt->userData = ctxt;
13002     else
13003         ctxt->userData = ctx->userData;
13004 
13005     /*
13006      * Doing validity checking on chunk doesn't make sense
13007      */
13008     ctxt->instate = XML_PARSER_CONTENT;
13009     ctxt->validate = ctx->validate;
13010     ctxt->valid = ctx->valid;
13011     ctxt->loadsubset = ctx->loadsubset;
13012     ctxt->depth = ctx->depth + 1;
13013     ctxt->replaceEntities = ctx->replaceEntities;
13014     if (ctxt->validate) {
13015 	ctxt->vctxt.error = ctx->vctxt.error;
13016 	ctxt->vctxt.warning = ctx->vctxt.warning;
13017     } else {
13018 	ctxt->vctxt.error = NULL;
13019 	ctxt->vctxt.warning = NULL;
13020     }
13021     ctxt->vctxt.nodeTab = NULL;
13022     ctxt->vctxt.nodeNr = 0;
13023     ctxt->vctxt.nodeMax = 0;
13024     ctxt->vctxt.node = NULL;
13025     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13026     ctxt->dict = ctx->dict;
13027     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13028     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13029     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13030     ctxt->dictNames = ctx->dictNames;
13031     ctxt->attsDefault = ctx->attsDefault;
13032     ctxt->attsSpecial = ctx->attsSpecial;
13033     ctxt->linenumbers = ctx->linenumbers;
13034 
13035     xmlParseContent(ctxt);
13036 
13037     ctx->validate = ctxt->validate;
13038     ctx->valid = ctxt->valid;
13039     if ((RAW == '<') && (NXT(1) == '/')) {
13040 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13041     } else if (RAW != 0) {
13042 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13043     }
13044     if (ctxt->node != newDoc->children) {
13045 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13046     }
13047 
13048     if (!ctxt->wellFormed) {
13049         if (ctxt->errNo == 0)
13050 	    ret = 1;
13051 	else
13052 	    ret = ctxt->errNo;
13053     } else {
13054 	if (lst != NULL) {
13055 	    xmlNodePtr cur;
13056 
13057 	    /*
13058 	     * Return the newly created nodeset after unlinking it from
13059 	     * they pseudo parent.
13060 	     */
13061 	    cur = newDoc->children->children;
13062 	    *lst = cur;
13063 	    while (cur != NULL) {
13064 		cur->parent = NULL;
13065 		cur = cur->next;
13066 	    }
13067             newDoc->children->children = NULL;
13068 	}
13069 	ret = 0;
13070     }
13071     ctxt->sax = oldsax;
13072     ctxt->dict = NULL;
13073     ctxt->attsDefault = NULL;
13074     ctxt->attsSpecial = NULL;
13075     xmlFreeParserCtxt(ctxt);
13076     newDoc->intSubset = NULL;
13077     newDoc->extSubset = NULL;
13078     xmlFreeDoc(newDoc);
13079 
13080     return(ret);
13081 }
13082 
13083 /**
13084  * xmlParseExternalEntityPrivate:
13085  * @doc:  the document the chunk pertains to
13086  * @oldctxt:  the previous parser context if available
13087  * @sax:  the SAX handler bloc (possibly NULL)
13088  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13089  * @depth:  Used for loop detection, use 0
13090  * @URL:  the URL for the entity to load
13091  * @ID:  the System ID for the entity to load
13092  * @list:  the return value for the set of parsed nodes
13093  *
13094  * Private version of xmlParseExternalEntity()
13095  *
13096  * Returns 0 if the entity is well formed, -1 in case of args problem and
13097  *    the parser error code otherwise
13098  */
13099 
13100 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13101 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13102 	              xmlSAXHandlerPtr sax,
13103 		      void *user_data, int depth, const xmlChar *URL,
13104 		      const xmlChar *ID, xmlNodePtr *list) {
13105     xmlParserCtxtPtr ctxt;
13106     xmlDocPtr newDoc;
13107     xmlNodePtr newRoot;
13108     xmlSAXHandlerPtr oldsax = NULL;
13109     xmlParserErrors ret = XML_ERR_OK;
13110     xmlChar start[4];
13111     xmlCharEncoding enc;
13112 
13113     if (((depth > 40) &&
13114 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13115 	(depth > 1024)) {
13116 	return(XML_ERR_ENTITY_LOOP);
13117     }
13118 
13119     if (list != NULL)
13120         *list = NULL;
13121     if ((URL == NULL) && (ID == NULL))
13122 	return(XML_ERR_INTERNAL_ERROR);
13123     if (doc == NULL)
13124 	return(XML_ERR_INTERNAL_ERROR);
13125 
13126 
13127     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13128     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13129     ctxt->userData = ctxt;
13130     if (oldctxt != NULL) {
13131 	ctxt->_private = oldctxt->_private;
13132 	ctxt->loadsubset = oldctxt->loadsubset;
13133 	ctxt->validate = oldctxt->validate;
13134 	ctxt->external = oldctxt->external;
13135 	ctxt->record_info = oldctxt->record_info;
13136 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13137 	ctxt->node_seq.length = oldctxt->node_seq.length;
13138 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13139     } else {
13140 	/*
13141 	 * Doing validity checking on chunk without context
13142 	 * doesn't make sense
13143 	 */
13144 	ctxt->_private = NULL;
13145 	ctxt->validate = 0;
13146 	ctxt->external = 2;
13147 	ctxt->loadsubset = 0;
13148     }
13149     if (sax != NULL) {
13150 	oldsax = ctxt->sax;
13151         ctxt->sax = sax;
13152 	if (user_data != NULL)
13153 	    ctxt->userData = user_data;
13154     }
13155     xmlDetectSAX2(ctxt);
13156     newDoc = xmlNewDoc(BAD_CAST "1.0");
13157     if (newDoc == NULL) {
13158 	ctxt->node_seq.maximum = 0;
13159 	ctxt->node_seq.length = 0;
13160 	ctxt->node_seq.buffer = NULL;
13161 	xmlFreeParserCtxt(ctxt);
13162 	return(XML_ERR_INTERNAL_ERROR);
13163     }
13164     newDoc->properties = XML_DOC_INTERNAL;
13165     newDoc->intSubset = doc->intSubset;
13166     newDoc->extSubset = doc->extSubset;
13167     newDoc->dict = doc->dict;
13168     xmlDictReference(newDoc->dict);
13169 
13170     if (doc->URL != NULL) {
13171 	newDoc->URL = xmlStrdup(doc->URL);
13172     }
13173     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13174     if (newRoot == NULL) {
13175 	if (sax != NULL)
13176 	    ctxt->sax = oldsax;
13177 	ctxt->node_seq.maximum = 0;
13178 	ctxt->node_seq.length = 0;
13179 	ctxt->node_seq.buffer = NULL;
13180 	xmlFreeParserCtxt(ctxt);
13181 	newDoc->intSubset = NULL;
13182 	newDoc->extSubset = NULL;
13183         xmlFreeDoc(newDoc);
13184 	return(XML_ERR_INTERNAL_ERROR);
13185     }
13186     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13187     nodePush(ctxt, newDoc->children);
13188     ctxt->myDoc = doc;
13189     newRoot->doc = doc;
13190 
13191     /*
13192      * Get the 4 first bytes and decode the charset
13193      * if enc != XML_CHAR_ENCODING_NONE
13194      * plug some encoding conversion routines.
13195      */
13196     GROW;
13197     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13198 	start[0] = RAW;
13199 	start[1] = NXT(1);
13200 	start[2] = NXT(2);
13201 	start[3] = NXT(3);
13202 	enc = xmlDetectCharEncoding(start, 4);
13203 	if (enc != XML_CHAR_ENCODING_NONE) {
13204 	    xmlSwitchEncoding(ctxt, enc);
13205 	}
13206     }
13207 
13208     /*
13209      * Parse a possible text declaration first
13210      */
13211     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13212 	xmlParseTextDecl(ctxt);
13213     }
13214 
13215     ctxt->instate = XML_PARSER_CONTENT;
13216     ctxt->depth = depth;
13217 
13218     xmlParseContent(ctxt);
13219 
13220     if ((RAW == '<') && (NXT(1) == '/')) {
13221 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13222     } else if (RAW != 0) {
13223 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13224     }
13225     if (ctxt->node != newDoc->children) {
13226 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13227     }
13228 
13229     if (!ctxt->wellFormed) {
13230         if (ctxt->errNo == 0)
13231 	    ret = XML_ERR_INTERNAL_ERROR;
13232 	else
13233 	    ret = (xmlParserErrors)ctxt->errNo;
13234     } else {
13235 	if (list != NULL) {
13236 	    xmlNodePtr cur;
13237 
13238 	    /*
13239 	     * Return the newly created nodeset after unlinking it from
13240 	     * they pseudo parent.
13241 	     */
13242 	    cur = newDoc->children->children;
13243 	    *list = cur;
13244 	    while (cur != NULL) {
13245 		cur->parent = NULL;
13246 		cur = cur->next;
13247 	    }
13248             newDoc->children->children = NULL;
13249 	}
13250 	ret = XML_ERR_OK;
13251     }
13252 
13253     /*
13254      * Record in the parent context the number of entities replacement
13255      * done when parsing that reference.
13256      */
13257     if (oldctxt != NULL)
13258         oldctxt->nbentities += ctxt->nbentities;
13259 
13260     /*
13261      * Also record the size of the entity parsed
13262      */
13263     if (ctxt->input != NULL && oldctxt != NULL) {
13264 	oldctxt->sizeentities += ctxt->input->consumed;
13265 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13266     }
13267     /*
13268      * And record the last error if any
13269      */
13270     if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13271         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13272 
13273     if (sax != NULL)
13274 	ctxt->sax = oldsax;
13275     if (oldctxt != NULL) {
13276         oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13277         oldctxt->node_seq.length = ctxt->node_seq.length;
13278         oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13279     }
13280     ctxt->node_seq.maximum = 0;
13281     ctxt->node_seq.length = 0;
13282     ctxt->node_seq.buffer = NULL;
13283     xmlFreeParserCtxt(ctxt);
13284     newDoc->intSubset = NULL;
13285     newDoc->extSubset = NULL;
13286     xmlFreeDoc(newDoc);
13287 
13288     return(ret);
13289 }
13290 
13291 #ifdef LIBXML_SAX1_ENABLED
13292 /**
13293  * xmlParseExternalEntity:
13294  * @doc:  the document the chunk pertains to
13295  * @sax:  the SAX handler bloc (possibly NULL)
13296  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13297  * @depth:  Used for loop detection, use 0
13298  * @URL:  the URL for the entity to load
13299  * @ID:  the System ID for the entity to load
13300  * @lst:  the return value for the set of parsed nodes
13301  *
13302  * Parse an external general entity
13303  * An external general parsed entity is well-formed if it matches the
13304  * production labeled extParsedEnt.
13305  *
13306  * [78] extParsedEnt ::= TextDecl? content
13307  *
13308  * Returns 0 if the entity is well formed, -1 in case of args problem and
13309  *    the parser error code otherwise
13310  */
13311 
13312 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13313 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13314 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13315     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13316 		                       ID, lst));
13317 }
13318 
13319 /**
13320  * xmlParseBalancedChunkMemory:
13321  * @doc:  the document the chunk pertains to
13322  * @sax:  the SAX handler bloc (possibly NULL)
13323  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13324  * @depth:  Used for loop detection, use 0
13325  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13326  * @lst:  the return value for the set of parsed nodes
13327  *
13328  * Parse a well-balanced chunk of an XML document
13329  * called by the parser
13330  * The allowed sequence for the Well Balanced Chunk is the one defined by
13331  * the content production in the XML grammar:
13332  *
13333  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13334  *
13335  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13336  *    the parser error code otherwise
13337  */
13338 
13339 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13340 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13341      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13342     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13343                                                 depth, string, lst, 0 );
13344 }
13345 #endif /* LIBXML_SAX1_ENABLED */
13346 
13347 /**
13348  * xmlParseBalancedChunkMemoryInternal:
13349  * @oldctxt:  the existing parsing context
13350  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13351  * @user_data:  the user data field for the parser context
13352  * @lst:  the return value for the set of parsed nodes
13353  *
13354  *
13355  * Parse a well-balanced chunk of an XML document
13356  * called by the parser
13357  * The allowed sequence for the Well Balanced Chunk is the one defined by
13358  * the content production in the XML grammar:
13359  *
13360  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13361  *
13362  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13363  * error code otherwise
13364  *
13365  * In case recover is set to 1, the nodelist will not be empty even if
13366  * the parsed chunk is not well balanced.
13367  */
13368 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13369 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13370 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13371     xmlParserCtxtPtr ctxt;
13372     xmlDocPtr newDoc = NULL;
13373     xmlNodePtr newRoot;
13374     xmlSAXHandlerPtr oldsax = NULL;
13375     xmlNodePtr content = NULL;
13376     xmlNodePtr last = NULL;
13377     int size;
13378     xmlParserErrors ret = XML_ERR_OK;
13379 #ifdef SAX2
13380     int i;
13381 #endif
13382 
13383     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13384         (oldctxt->depth >  1024)) {
13385 	return(XML_ERR_ENTITY_LOOP);
13386     }
13387 
13388 
13389     if (lst != NULL)
13390         *lst = NULL;
13391     if (string == NULL)
13392         return(XML_ERR_INTERNAL_ERROR);
13393 
13394     size = xmlStrlen(string);
13395 
13396     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13397     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13398     if (user_data != NULL)
13399 	ctxt->userData = user_data;
13400     else
13401 	ctxt->userData = ctxt;
13402     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13403     ctxt->dict = oldctxt->dict;
13404     ctxt->input_id = oldctxt->input_id + 1;
13405     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13406     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13407     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13408 
13409 #ifdef SAX2
13410     /* propagate namespaces down the entity */
13411     for (i = 0;i < oldctxt->nsNr;i += 2) {
13412         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13413     }
13414 #endif
13415 
13416     oldsax = ctxt->sax;
13417     ctxt->sax = oldctxt->sax;
13418     xmlDetectSAX2(ctxt);
13419     ctxt->replaceEntities = oldctxt->replaceEntities;
13420     ctxt->options = oldctxt->options;
13421 
13422     ctxt->_private = oldctxt->_private;
13423     if (oldctxt->myDoc == NULL) {
13424 	newDoc = xmlNewDoc(BAD_CAST "1.0");
13425 	if (newDoc == NULL) {
13426 	    ctxt->sax = oldsax;
13427 	    ctxt->dict = NULL;
13428 	    xmlFreeParserCtxt(ctxt);
13429 	    return(XML_ERR_INTERNAL_ERROR);
13430 	}
13431 	newDoc->properties = XML_DOC_INTERNAL;
13432 	newDoc->dict = ctxt->dict;
13433 	xmlDictReference(newDoc->dict);
13434 	ctxt->myDoc = newDoc;
13435     } else {
13436 	ctxt->myDoc = oldctxt->myDoc;
13437         content = ctxt->myDoc->children;
13438 	last = ctxt->myDoc->last;
13439     }
13440     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13441     if (newRoot == NULL) {
13442 	ctxt->sax = oldsax;
13443 	ctxt->dict = NULL;
13444 	xmlFreeParserCtxt(ctxt);
13445 	if (newDoc != NULL) {
13446 	    xmlFreeDoc(newDoc);
13447 	}
13448 	return(XML_ERR_INTERNAL_ERROR);
13449     }
13450     ctxt->myDoc->children = NULL;
13451     ctxt->myDoc->last = NULL;
13452     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13453     nodePush(ctxt, ctxt->myDoc->children);
13454     ctxt->instate = XML_PARSER_CONTENT;
13455     ctxt->depth = oldctxt->depth + 1;
13456 
13457     ctxt->validate = 0;
13458     ctxt->loadsubset = oldctxt->loadsubset;
13459     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13460 	/*
13461 	 * ID/IDREF registration will be done in xmlValidateElement below
13462 	 */
13463 	ctxt->loadsubset |= XML_SKIP_IDS;
13464     }
13465     ctxt->dictNames = oldctxt->dictNames;
13466     ctxt->attsDefault = oldctxt->attsDefault;
13467     ctxt->attsSpecial = oldctxt->attsSpecial;
13468 
13469     xmlParseContent(ctxt);
13470     if ((RAW == '<') && (NXT(1) == '/')) {
13471 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472     } else if (RAW != 0) {
13473 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474     }
13475     if (ctxt->node != ctxt->myDoc->children) {
13476 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477     }
13478 
13479     if (!ctxt->wellFormed) {
13480         if (ctxt->errNo == 0)
13481 	    ret = XML_ERR_INTERNAL_ERROR;
13482 	else
13483 	    ret = (xmlParserErrors)ctxt->errNo;
13484     } else {
13485       ret = XML_ERR_OK;
13486     }
13487 
13488     if ((lst != NULL) && (ret == XML_ERR_OK)) {
13489 	xmlNodePtr cur;
13490 
13491 	/*
13492 	 * Return the newly created nodeset after unlinking it from
13493 	 * they pseudo parent.
13494 	 */
13495 	cur = ctxt->myDoc->children->children;
13496 	*lst = cur;
13497 	while (cur != NULL) {
13498 #ifdef LIBXML_VALID_ENABLED
13499 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13500 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13501 		(cur->type == XML_ELEMENT_NODE)) {
13502 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13503 			oldctxt->myDoc, cur);
13504 	    }
13505 #endif /* LIBXML_VALID_ENABLED */
13506 	    cur->parent = NULL;
13507 	    cur = cur->next;
13508 	}
13509 	ctxt->myDoc->children->children = NULL;
13510     }
13511     if (ctxt->myDoc != NULL) {
13512 	xmlFreeNode(ctxt->myDoc->children);
13513         ctxt->myDoc->children = content;
13514         ctxt->myDoc->last = last;
13515     }
13516 
13517     /*
13518      * Record in the parent context the number of entities replacement
13519      * done when parsing that reference.
13520      */
13521     if (oldctxt != NULL)
13522         oldctxt->nbentities += ctxt->nbentities;
13523 
13524     /*
13525      * Also record the last error if any
13526      */
13527     if (ctxt->lastError.code != XML_ERR_OK)
13528         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13529 
13530     ctxt->sax = oldsax;
13531     ctxt->dict = NULL;
13532     ctxt->attsDefault = NULL;
13533     ctxt->attsSpecial = NULL;
13534     xmlFreeParserCtxt(ctxt);
13535     if (newDoc != NULL) {
13536 	xmlFreeDoc(newDoc);
13537     }
13538 
13539     return(ret);
13540 }
13541 
13542 /**
13543  * xmlParseInNodeContext:
13544  * @node:  the context node
13545  * @data:  the input string
13546  * @datalen:  the input string length in bytes
13547  * @options:  a combination of xmlParserOption
13548  * @lst:  the return value for the set of parsed nodes
13549  *
13550  * Parse a well-balanced chunk of an XML document
13551  * within the context (DTD, namespaces, etc ...) of the given node.
13552  *
13553  * The allowed sequence for the data is a Well Balanced Chunk defined by
13554  * the content production in the XML grammar:
13555  *
13556  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13557  *
13558  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13559  * error code otherwise
13560  */
13561 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13562 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13563                       int options, xmlNodePtr *lst) {
13564 #ifdef SAX2
13565     xmlParserCtxtPtr ctxt;
13566     xmlDocPtr doc = NULL;
13567     xmlNodePtr fake, cur;
13568     int nsnr = 0;
13569 
13570     xmlParserErrors ret = XML_ERR_OK;
13571 
13572     /*
13573      * check all input parameters, grab the document
13574      */
13575     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13576         return(XML_ERR_INTERNAL_ERROR);
13577     switch (node->type) {
13578         case XML_ELEMENT_NODE:
13579         case XML_ATTRIBUTE_NODE:
13580         case XML_TEXT_NODE:
13581         case XML_CDATA_SECTION_NODE:
13582         case XML_ENTITY_REF_NODE:
13583         case XML_PI_NODE:
13584         case XML_COMMENT_NODE:
13585         case XML_DOCUMENT_NODE:
13586         case XML_HTML_DOCUMENT_NODE:
13587 	    break;
13588 	default:
13589 	    return(XML_ERR_INTERNAL_ERROR);
13590 
13591     }
13592     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13593            (node->type != XML_DOCUMENT_NODE) &&
13594 	   (node->type != XML_HTML_DOCUMENT_NODE))
13595 	node = node->parent;
13596     if (node == NULL)
13597 	return(XML_ERR_INTERNAL_ERROR);
13598     if (node->type == XML_ELEMENT_NODE)
13599 	doc = node->doc;
13600     else
13601         doc = (xmlDocPtr) node;
13602     if (doc == NULL)
13603 	return(XML_ERR_INTERNAL_ERROR);
13604 
13605     /*
13606      * allocate a context and set-up everything not related to the
13607      * node position in the tree
13608      */
13609     if (doc->type == XML_DOCUMENT_NODE)
13610 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13611 #ifdef LIBXML_HTML_ENABLED
13612     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13613 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13614         /*
13615          * When parsing in context, it makes no sense to add implied
13616          * elements like html/body/etc...
13617          */
13618         options |= HTML_PARSE_NOIMPLIED;
13619     }
13620 #endif
13621     else
13622         return(XML_ERR_INTERNAL_ERROR);
13623 
13624     if (ctxt == NULL)
13625         return(XML_ERR_NO_MEMORY);
13626 
13627     /*
13628      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13629      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13630      * we must wait until the last moment to free the original one.
13631      */
13632     if (doc->dict != NULL) {
13633         if (ctxt->dict != NULL)
13634 	    xmlDictFree(ctxt->dict);
13635 	ctxt->dict = doc->dict;
13636     } else
13637         options |= XML_PARSE_NODICT;
13638 
13639     if (doc->encoding != NULL) {
13640         xmlCharEncodingHandlerPtr hdlr;
13641 
13642         if (ctxt->encoding != NULL)
13643 	    xmlFree((xmlChar *) ctxt->encoding);
13644         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13645 
13646         hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13647         if (hdlr != NULL) {
13648             xmlSwitchToEncoding(ctxt, hdlr);
13649 	} else {
13650             return(XML_ERR_UNSUPPORTED_ENCODING);
13651         }
13652     }
13653 
13654     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13655     xmlDetectSAX2(ctxt);
13656     ctxt->myDoc = doc;
13657     /* parsing in context, i.e. as within existing content */
13658     ctxt->input_id = 2;
13659     ctxt->instate = XML_PARSER_CONTENT;
13660 
13661     fake = xmlNewComment(NULL);
13662     if (fake == NULL) {
13663         xmlFreeParserCtxt(ctxt);
13664 	return(XML_ERR_NO_MEMORY);
13665     }
13666     xmlAddChild(node, fake);
13667 
13668     if (node->type == XML_ELEMENT_NODE) {
13669 	nodePush(ctxt, node);
13670 	/*
13671 	 * initialize the SAX2 namespaces stack
13672 	 */
13673 	cur = node;
13674 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13675 	    xmlNsPtr ns = cur->nsDef;
13676 	    const xmlChar *iprefix, *ihref;
13677 
13678 	    while (ns != NULL) {
13679 		if (ctxt->dict) {
13680 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13681 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13682 		} else {
13683 		    iprefix = ns->prefix;
13684 		    ihref = ns->href;
13685 		}
13686 
13687 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13688 		    nsPush(ctxt, iprefix, ihref);
13689 		    nsnr++;
13690 		}
13691 		ns = ns->next;
13692 	    }
13693 	    cur = cur->parent;
13694 	}
13695     }
13696 
13697     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13698 	/*
13699 	 * ID/IDREF registration will be done in xmlValidateElement below
13700 	 */
13701 	ctxt->loadsubset |= XML_SKIP_IDS;
13702     }
13703 
13704 #ifdef LIBXML_HTML_ENABLED
13705     if (doc->type == XML_HTML_DOCUMENT_NODE)
13706         __htmlParseContent(ctxt);
13707     else
13708 #endif
13709 	xmlParseContent(ctxt);
13710 
13711     nsPop(ctxt, nsnr);
13712     if ((RAW == '<') && (NXT(1) == '/')) {
13713 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13714     } else if (RAW != 0) {
13715 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13716     }
13717     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13718 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13719 	ctxt->wellFormed = 0;
13720     }
13721 
13722     if (!ctxt->wellFormed) {
13723         if (ctxt->errNo == 0)
13724 	    ret = XML_ERR_INTERNAL_ERROR;
13725 	else
13726 	    ret = (xmlParserErrors)ctxt->errNo;
13727     } else {
13728         ret = XML_ERR_OK;
13729     }
13730 
13731     /*
13732      * Return the newly created nodeset after unlinking it from
13733      * the pseudo sibling.
13734      */
13735 
13736     cur = fake->next;
13737     fake->next = NULL;
13738     node->last = fake;
13739 
13740     if (cur != NULL) {
13741 	cur->prev = NULL;
13742     }
13743 
13744     *lst = cur;
13745 
13746     while (cur != NULL) {
13747 	cur->parent = NULL;
13748 	cur = cur->next;
13749     }
13750 
13751     xmlUnlinkNode(fake);
13752     xmlFreeNode(fake);
13753 
13754 
13755     if (ret != XML_ERR_OK) {
13756         xmlFreeNodeList(*lst);
13757 	*lst = NULL;
13758     }
13759 
13760     if (doc->dict != NULL)
13761         ctxt->dict = NULL;
13762     xmlFreeParserCtxt(ctxt);
13763 
13764     return(ret);
13765 #else /* !SAX2 */
13766     return(XML_ERR_INTERNAL_ERROR);
13767 #endif
13768 }
13769 
13770 #ifdef LIBXML_SAX1_ENABLED
13771 /**
13772  * xmlParseBalancedChunkMemoryRecover:
13773  * @doc:  the document the chunk pertains to
13774  * @sax:  the SAX handler bloc (possibly NULL)
13775  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13776  * @depth:  Used for loop detection, use 0
13777  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13778  * @lst:  the return value for the set of parsed nodes
13779  * @recover: return nodes even if the data is broken (use 0)
13780  *
13781  *
13782  * Parse a well-balanced chunk of an XML document
13783  * called by the parser
13784  * The allowed sequence for the Well Balanced Chunk is the one defined by
13785  * the content production in the XML grammar:
13786  *
13787  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13788  *
13789  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13790  *    the parser error code otherwise
13791  *
13792  * In case recover is set to 1, the nodelist will not be empty even if
13793  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13794  * some extent.
13795  */
13796 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13797 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13798      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13799      int recover) {
13800     xmlParserCtxtPtr ctxt;
13801     xmlDocPtr newDoc;
13802     xmlSAXHandlerPtr oldsax = NULL;
13803     xmlNodePtr content, newRoot;
13804     int size;
13805     int ret = 0;
13806 
13807     if (depth > 40) {
13808 	return(XML_ERR_ENTITY_LOOP);
13809     }
13810 
13811 
13812     if (lst != NULL)
13813         *lst = NULL;
13814     if (string == NULL)
13815         return(-1);
13816 
13817     size = xmlStrlen(string);
13818 
13819     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13820     if (ctxt == NULL) return(-1);
13821     ctxt->userData = ctxt;
13822     if (sax != NULL) {
13823 	oldsax = ctxt->sax;
13824         ctxt->sax = sax;
13825 	if (user_data != NULL)
13826 	    ctxt->userData = user_data;
13827     }
13828     newDoc = xmlNewDoc(BAD_CAST "1.0");
13829     if (newDoc == NULL) {
13830 	xmlFreeParserCtxt(ctxt);
13831 	return(-1);
13832     }
13833     newDoc->properties = XML_DOC_INTERNAL;
13834     if ((doc != NULL) && (doc->dict != NULL)) {
13835         xmlDictFree(ctxt->dict);
13836 	ctxt->dict = doc->dict;
13837 	xmlDictReference(ctxt->dict);
13838 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13839 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13840 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13841 	ctxt->dictNames = 1;
13842     } else {
13843 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13844     }
13845     if (doc != NULL) {
13846 	newDoc->intSubset = doc->intSubset;
13847 	newDoc->extSubset = doc->extSubset;
13848     }
13849     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13850     if (newRoot == NULL) {
13851 	if (sax != NULL)
13852 	    ctxt->sax = oldsax;
13853 	xmlFreeParserCtxt(ctxt);
13854 	newDoc->intSubset = NULL;
13855 	newDoc->extSubset = NULL;
13856         xmlFreeDoc(newDoc);
13857 	return(-1);
13858     }
13859     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13860     nodePush(ctxt, newRoot);
13861     if (doc == NULL) {
13862 	ctxt->myDoc = newDoc;
13863     } else {
13864 	ctxt->myDoc = newDoc;
13865 	newDoc->children->doc = doc;
13866 	/* Ensure that doc has XML spec namespace */
13867 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13868 	newDoc->oldNs = doc->oldNs;
13869     }
13870     ctxt->instate = XML_PARSER_CONTENT;
13871     ctxt->input_id = 2;
13872     ctxt->depth = depth;
13873 
13874     /*
13875      * Doing validity checking on chunk doesn't make sense
13876      */
13877     ctxt->validate = 0;
13878     ctxt->loadsubset = 0;
13879     xmlDetectSAX2(ctxt);
13880 
13881     if ( doc != NULL ){
13882         content = doc->children;
13883         doc->children = NULL;
13884         xmlParseContent(ctxt);
13885         doc->children = content;
13886     }
13887     else {
13888         xmlParseContent(ctxt);
13889     }
13890     if ((RAW == '<') && (NXT(1) == '/')) {
13891 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13892     } else if (RAW != 0) {
13893 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13894     }
13895     if (ctxt->node != newDoc->children) {
13896 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13897     }
13898 
13899     if (!ctxt->wellFormed) {
13900         if (ctxt->errNo == 0)
13901 	    ret = 1;
13902 	else
13903 	    ret = ctxt->errNo;
13904     } else {
13905       ret = 0;
13906     }
13907 
13908     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13909 	xmlNodePtr cur;
13910 
13911 	/*
13912 	 * Return the newly created nodeset after unlinking it from
13913 	 * they pseudo parent.
13914 	 */
13915 	cur = newDoc->children->children;
13916 	*lst = cur;
13917 	while (cur != NULL) {
13918 	    xmlSetTreeDoc(cur, doc);
13919 	    cur->parent = NULL;
13920 	    cur = cur->next;
13921 	}
13922 	newDoc->children->children = NULL;
13923     }
13924 
13925     if (sax != NULL)
13926 	ctxt->sax = oldsax;
13927     xmlFreeParserCtxt(ctxt);
13928     newDoc->intSubset = NULL;
13929     newDoc->extSubset = NULL;
13930     newDoc->oldNs = NULL;
13931     xmlFreeDoc(newDoc);
13932 
13933     return(ret);
13934 }
13935 
13936 /**
13937  * xmlSAXParseEntity:
13938  * @sax:  the SAX handler block
13939  * @filename:  the filename
13940  *
13941  * parse an XML external entity out of context and build a tree.
13942  * It use the given SAX function block to handle the parsing callback.
13943  * If sax is NULL, fallback to the default DOM tree building routines.
13944  *
13945  * [78] extParsedEnt ::= TextDecl? content
13946  *
13947  * This correspond to a "Well Balanced" chunk
13948  *
13949  * Returns the resulting document tree
13950  */
13951 
13952 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13953 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13954     xmlDocPtr ret;
13955     xmlParserCtxtPtr ctxt;
13956 
13957     ctxt = xmlCreateFileParserCtxt(filename);
13958     if (ctxt == NULL) {
13959 	return(NULL);
13960     }
13961     if (sax != NULL) {
13962 	if (ctxt->sax != NULL)
13963 	    xmlFree(ctxt->sax);
13964         ctxt->sax = sax;
13965         ctxt->userData = NULL;
13966     }
13967 
13968     xmlParseExtParsedEnt(ctxt);
13969 
13970     if (ctxt->wellFormed)
13971 	ret = ctxt->myDoc;
13972     else {
13973         ret = NULL;
13974         xmlFreeDoc(ctxt->myDoc);
13975         ctxt->myDoc = NULL;
13976     }
13977     if (sax != NULL)
13978         ctxt->sax = NULL;
13979     xmlFreeParserCtxt(ctxt);
13980 
13981     return(ret);
13982 }
13983 
13984 /**
13985  * xmlParseEntity:
13986  * @filename:  the filename
13987  *
13988  * parse an XML external entity out of context and build a tree.
13989  *
13990  * [78] extParsedEnt ::= TextDecl? content
13991  *
13992  * This correspond to a "Well Balanced" chunk
13993  *
13994  * Returns the resulting document tree
13995  */
13996 
13997 xmlDocPtr
xmlParseEntity(const char * filename)13998 xmlParseEntity(const char *filename) {
13999     return(xmlSAXParseEntity(NULL, filename));
14000 }
14001 #endif /* LIBXML_SAX1_ENABLED */
14002 
14003 /**
14004  * xmlCreateEntityParserCtxtInternal:
14005  * @URL:  the entity URL
14006  * @ID:  the entity PUBLIC ID
14007  * @base:  a possible base for the target URI
14008  * @pctx:  parser context used to set options on new context
14009  *
14010  * Create a parser context for an external entity
14011  * Automatic support for ZLIB/Compress compressed document is provided
14012  * by default if found at compile-time.
14013  *
14014  * Returns the new parser context or NULL
14015  */
14016 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14017 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14018 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
14019     xmlParserCtxtPtr ctxt;
14020     xmlParserInputPtr inputStream;
14021     char *directory = NULL;
14022     xmlChar *uri;
14023 
14024     ctxt = xmlNewParserCtxt();
14025     if (ctxt == NULL) {
14026 	return(NULL);
14027     }
14028 
14029     if (pctx != NULL) {
14030         ctxt->options = pctx->options;
14031         ctxt->_private = pctx->_private;
14032 	/*
14033 	 * this is a subparser of pctx, so the input_id should be
14034 	 * incremented to distinguish from main entity
14035 	 */
14036 	ctxt->input_id = pctx->input_id + 1;
14037     }
14038 
14039     uri = xmlBuildURI(URL, base);
14040 
14041     if (uri == NULL) {
14042 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14043 	if (inputStream == NULL) {
14044 	    xmlFreeParserCtxt(ctxt);
14045 	    return(NULL);
14046 	}
14047 
14048 	inputPush(ctxt, inputStream);
14049 
14050 	if ((ctxt->directory == NULL) && (directory == NULL))
14051 	    directory = xmlParserGetDirectory((char *)URL);
14052 	if ((ctxt->directory == NULL) && (directory != NULL))
14053 	    ctxt->directory = directory;
14054     } else {
14055 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14056 	if (inputStream == NULL) {
14057 	    xmlFree(uri);
14058 	    xmlFreeParserCtxt(ctxt);
14059 	    return(NULL);
14060 	}
14061 
14062 	inputPush(ctxt, inputStream);
14063 
14064 	if ((ctxt->directory == NULL) && (directory == NULL))
14065 	    directory = xmlParserGetDirectory((char *)uri);
14066 	if ((ctxt->directory == NULL) && (directory != NULL))
14067 	    ctxt->directory = directory;
14068 	xmlFree(uri);
14069     }
14070     return(ctxt);
14071 }
14072 
14073 /**
14074  * xmlCreateEntityParserCtxt:
14075  * @URL:  the entity URL
14076  * @ID:  the entity PUBLIC ID
14077  * @base:  a possible base for the target URI
14078  *
14079  * Create a parser context for an external entity
14080  * Automatic support for ZLIB/Compress compressed document is provided
14081  * by default if found at compile-time.
14082  *
14083  * Returns the new parser context or NULL
14084  */
14085 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14086 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14087 	                  const xmlChar *base) {
14088     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14089 
14090 }
14091 
14092 /************************************************************************
14093  *									*
14094  *		Front ends when parsing from a file			*
14095  *									*
14096  ************************************************************************/
14097 
14098 /**
14099  * xmlCreateURLParserCtxt:
14100  * @filename:  the filename or URL
14101  * @options:  a combination of xmlParserOption
14102  *
14103  * Create a parser context for a file or URL content.
14104  * Automatic support for ZLIB/Compress compressed document is provided
14105  * by default if found at compile-time and for file accesses
14106  *
14107  * Returns the new parser context or NULL
14108  */
14109 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14110 xmlCreateURLParserCtxt(const char *filename, int options)
14111 {
14112     xmlParserCtxtPtr ctxt;
14113     xmlParserInputPtr inputStream;
14114     char *directory = NULL;
14115 
14116     ctxt = xmlNewParserCtxt();
14117     if (ctxt == NULL) {
14118 	xmlErrMemory(NULL, "cannot allocate parser context");
14119 	return(NULL);
14120     }
14121 
14122     if (options)
14123 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14124     ctxt->linenumbers = 1;
14125 
14126     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14127     if (inputStream == NULL) {
14128 	xmlFreeParserCtxt(ctxt);
14129 	return(NULL);
14130     }
14131 
14132     inputPush(ctxt, inputStream);
14133     if ((ctxt->directory == NULL) && (directory == NULL))
14134         directory = xmlParserGetDirectory(filename);
14135     if ((ctxt->directory == NULL) && (directory != NULL))
14136         ctxt->directory = directory;
14137 
14138     return(ctxt);
14139 }
14140 
14141 /**
14142  * xmlCreateFileParserCtxt:
14143  * @filename:  the filename
14144  *
14145  * Create a parser context for a file content.
14146  * Automatic support for ZLIB/Compress compressed document is provided
14147  * by default if found at compile-time.
14148  *
14149  * Returns the new parser context or NULL
14150  */
14151 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14152 xmlCreateFileParserCtxt(const char *filename)
14153 {
14154     return(xmlCreateURLParserCtxt(filename, 0));
14155 }
14156 
14157 #ifdef LIBXML_SAX1_ENABLED
14158 /**
14159  * xmlSAXParseFileWithData:
14160  * @sax:  the SAX handler block
14161  * @filename:  the filename
14162  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14163  *             documents
14164  * @data:  the userdata
14165  *
14166  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167  * compressed document is provided by default if found at compile-time.
14168  * It use the given SAX function block to handle the parsing callback.
14169  * If sax is NULL, fallback to the default DOM tree building routines.
14170  *
14171  * User data (void *) is stored within the parser context in the
14172  * context's _private member, so it is available nearly everywhere in libxml
14173  *
14174  * Returns the resulting document tree
14175  */
14176 
14177 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14178 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14179                         int recovery, void *data) {
14180     xmlDocPtr ret;
14181     xmlParserCtxtPtr ctxt;
14182 
14183     xmlInitParser();
14184 
14185     ctxt = xmlCreateFileParserCtxt(filename);
14186     if (ctxt == NULL) {
14187 	return(NULL);
14188     }
14189     if (sax != NULL) {
14190 	if (ctxt->sax != NULL)
14191 	    xmlFree(ctxt->sax);
14192         ctxt->sax = sax;
14193     }
14194     xmlDetectSAX2(ctxt);
14195     if (data!=NULL) {
14196 	ctxt->_private = data;
14197     }
14198 
14199     if (ctxt->directory == NULL)
14200         ctxt->directory = xmlParserGetDirectory(filename);
14201 
14202     ctxt->recovery = recovery;
14203 
14204     xmlParseDocument(ctxt);
14205 
14206     if ((ctxt->wellFormed) || recovery) {
14207         ret = ctxt->myDoc;
14208 	if (ret != NULL) {
14209 	    if (ctxt->input->buf->compressed > 0)
14210 		ret->compression = 9;
14211 	    else
14212 		ret->compression = ctxt->input->buf->compressed;
14213 	}
14214     }
14215     else {
14216        ret = NULL;
14217        xmlFreeDoc(ctxt->myDoc);
14218        ctxt->myDoc = NULL;
14219     }
14220     if (sax != NULL)
14221         ctxt->sax = NULL;
14222     xmlFreeParserCtxt(ctxt);
14223 
14224     return(ret);
14225 }
14226 
14227 /**
14228  * xmlSAXParseFile:
14229  * @sax:  the SAX handler block
14230  * @filename:  the filename
14231  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14232  *             documents
14233  *
14234  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14235  * compressed document is provided by default if found at compile-time.
14236  * It use the given SAX function block to handle the parsing callback.
14237  * If sax is NULL, fallback to the default DOM tree building routines.
14238  *
14239  * Returns the resulting document tree
14240  */
14241 
14242 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14243 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14244                           int recovery) {
14245     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14246 }
14247 
14248 /**
14249  * xmlRecoverDoc:
14250  * @cur:  a pointer to an array of xmlChar
14251  *
14252  * parse an XML in-memory document and build a tree.
14253  * In the case the document is not Well Formed, a attempt to build a
14254  * tree is tried anyway
14255  *
14256  * Returns the resulting document tree or NULL in case of failure
14257  */
14258 
14259 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14260 xmlRecoverDoc(const xmlChar *cur) {
14261     return(xmlSAXParseDoc(NULL, cur, 1));
14262 }
14263 
14264 /**
14265  * xmlParseFile:
14266  * @filename:  the filename
14267  *
14268  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14269  * compressed document is provided by default if found at compile-time.
14270  *
14271  * Returns the resulting document tree if the file was wellformed,
14272  * NULL otherwise.
14273  */
14274 
14275 xmlDocPtr
xmlParseFile(const char * filename)14276 xmlParseFile(const char *filename) {
14277     return(xmlSAXParseFile(NULL, filename, 0));
14278 }
14279 
14280 /**
14281  * xmlRecoverFile:
14282  * @filename:  the filename
14283  *
14284  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14285  * compressed document is provided by default if found at compile-time.
14286  * In the case the document is not Well Formed, it attempts to build
14287  * a tree anyway
14288  *
14289  * Returns the resulting document tree or NULL in case of failure
14290  */
14291 
14292 xmlDocPtr
xmlRecoverFile(const char * filename)14293 xmlRecoverFile(const char *filename) {
14294     return(xmlSAXParseFile(NULL, filename, 1));
14295 }
14296 
14297 
14298 /**
14299  * xmlSetupParserForBuffer:
14300  * @ctxt:  an XML parser context
14301  * @buffer:  a xmlChar * buffer
14302  * @filename:  a file name
14303  *
14304  * Setup the parser context to parse a new buffer; Clears any prior
14305  * contents from the parser context. The buffer parameter must not be
14306  * NULL, but the filename parameter can be
14307  */
14308 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14309 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14310                              const char* filename)
14311 {
14312     xmlParserInputPtr input;
14313 
14314     if ((ctxt == NULL) || (buffer == NULL))
14315         return;
14316 
14317     input = xmlNewInputStream(ctxt);
14318     if (input == NULL) {
14319         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14320         xmlClearParserCtxt(ctxt);
14321         return;
14322     }
14323 
14324     xmlClearParserCtxt(ctxt);
14325     if (filename != NULL)
14326         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14327     input->base = buffer;
14328     input->cur = buffer;
14329     input->end = &buffer[xmlStrlen(buffer)];
14330     inputPush(ctxt, input);
14331 }
14332 
14333 /**
14334  * xmlSAXUserParseFile:
14335  * @sax:  a SAX handler
14336  * @user_data:  The user data returned on SAX callbacks
14337  * @filename:  a file name
14338  *
14339  * parse an XML file and call the given SAX handler routines.
14340  * Automatic support for ZLIB/Compress compressed document is provided
14341  *
14342  * Returns 0 in case of success or a error number otherwise
14343  */
14344 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14345 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14346                     const char *filename) {
14347     int ret = 0;
14348     xmlParserCtxtPtr ctxt;
14349 
14350     ctxt = xmlCreateFileParserCtxt(filename);
14351     if (ctxt == NULL) return -1;
14352     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14353 	xmlFree(ctxt->sax);
14354     ctxt->sax = sax;
14355     xmlDetectSAX2(ctxt);
14356 
14357     if (user_data != NULL)
14358 	ctxt->userData = user_data;
14359 
14360     xmlParseDocument(ctxt);
14361 
14362     if (ctxt->wellFormed)
14363 	ret = 0;
14364     else {
14365         if (ctxt->errNo != 0)
14366 	    ret = ctxt->errNo;
14367 	else
14368 	    ret = -1;
14369     }
14370     if (sax != NULL)
14371 	ctxt->sax = NULL;
14372     if (ctxt->myDoc != NULL) {
14373         xmlFreeDoc(ctxt->myDoc);
14374 	ctxt->myDoc = NULL;
14375     }
14376     xmlFreeParserCtxt(ctxt);
14377 
14378     return ret;
14379 }
14380 #endif /* LIBXML_SAX1_ENABLED */
14381 
14382 /************************************************************************
14383  *									*
14384  *		Front ends when parsing from memory			*
14385  *									*
14386  ************************************************************************/
14387 
14388 /**
14389  * xmlCreateMemoryParserCtxt:
14390  * @buffer:  a pointer to a char array
14391  * @size:  the size of the array
14392  *
14393  * Create a parser context for an XML in-memory document.
14394  *
14395  * Returns the new parser context or NULL
14396  */
14397 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14398 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14399     xmlParserCtxtPtr ctxt;
14400     xmlParserInputPtr input;
14401     xmlParserInputBufferPtr buf;
14402 
14403     if (buffer == NULL)
14404 	return(NULL);
14405     if (size <= 0)
14406 	return(NULL);
14407 
14408     ctxt = xmlNewParserCtxt();
14409     if (ctxt == NULL)
14410 	return(NULL);
14411 
14412     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14413     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14414     if (buf == NULL) {
14415 	xmlFreeParserCtxt(ctxt);
14416 	return(NULL);
14417     }
14418 
14419     input = xmlNewInputStream(ctxt);
14420     if (input == NULL) {
14421 	xmlFreeParserInputBuffer(buf);
14422 	xmlFreeParserCtxt(ctxt);
14423 	return(NULL);
14424     }
14425 
14426     input->filename = NULL;
14427     input->buf = buf;
14428     xmlBufResetInput(input->buf->buffer, input);
14429 
14430     inputPush(ctxt, input);
14431     return(ctxt);
14432 }
14433 
14434 #ifdef LIBXML_SAX1_ENABLED
14435 /**
14436  * xmlSAXParseMemoryWithData:
14437  * @sax:  the SAX handler block
14438  * @buffer:  an pointer to a char array
14439  * @size:  the size of the array
14440  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14441  *             documents
14442  * @data:  the userdata
14443  *
14444  * parse an XML in-memory block and use the given SAX function block
14445  * to handle the parsing callback. If sax is NULL, fallback to the default
14446  * DOM tree building routines.
14447  *
14448  * User data (void *) is stored within the parser context in the
14449  * context's _private member, so it is available nearly everywhere in libxml
14450  *
14451  * Returns the resulting document tree
14452  */
14453 
14454 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14455 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14456 	          int size, int recovery, void *data) {
14457     xmlDocPtr ret;
14458     xmlParserCtxtPtr ctxt;
14459 
14460     xmlInitParser();
14461 
14462     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14463     if (ctxt == NULL) return(NULL);
14464     if (sax != NULL) {
14465 	if (ctxt->sax != NULL)
14466 	    xmlFree(ctxt->sax);
14467         ctxt->sax = sax;
14468     }
14469     xmlDetectSAX2(ctxt);
14470     if (data!=NULL) {
14471 	ctxt->_private=data;
14472     }
14473 
14474     ctxt->recovery = recovery;
14475 
14476     xmlParseDocument(ctxt);
14477 
14478     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14479     else {
14480        ret = NULL;
14481        xmlFreeDoc(ctxt->myDoc);
14482        ctxt->myDoc = NULL;
14483     }
14484     if (sax != NULL)
14485 	ctxt->sax = NULL;
14486     xmlFreeParserCtxt(ctxt);
14487 
14488     return(ret);
14489 }
14490 
14491 /**
14492  * xmlSAXParseMemory:
14493  * @sax:  the SAX handler block
14494  * @buffer:  an pointer to a char array
14495  * @size:  the size of the array
14496  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14497  *             documents
14498  *
14499  * parse an XML in-memory block and use the given SAX function block
14500  * to handle the parsing callback. If sax is NULL, fallback to the default
14501  * DOM tree building routines.
14502  *
14503  * Returns the resulting document tree
14504  */
14505 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14506 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14507 	          int size, int recovery) {
14508     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14509 }
14510 
14511 /**
14512  * xmlParseMemory:
14513  * @buffer:  an pointer to a char array
14514  * @size:  the size of the array
14515  *
14516  * parse an XML in-memory block and build a tree.
14517  *
14518  * Returns the resulting document tree
14519  */
14520 
xmlParseMemory(const char * buffer,int size)14521 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14522    return(xmlSAXParseMemory(NULL, buffer, size, 0));
14523 }
14524 
14525 /**
14526  * xmlRecoverMemory:
14527  * @buffer:  an pointer to a char array
14528  * @size:  the size of the array
14529  *
14530  * parse an XML in-memory block and build a tree.
14531  * In the case the document is not Well Formed, an attempt to
14532  * build a tree is tried anyway
14533  *
14534  * Returns the resulting document tree or NULL in case of error
14535  */
14536 
xmlRecoverMemory(const char * buffer,int size)14537 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14538    return(xmlSAXParseMemory(NULL, buffer, size, 1));
14539 }
14540 
14541 /**
14542  * xmlSAXUserParseMemory:
14543  * @sax:  a SAX handler
14544  * @user_data:  The user data returned on SAX callbacks
14545  * @buffer:  an in-memory XML document input
14546  * @size:  the length of the XML document in bytes
14547  *
14548  * A better SAX parsing routine.
14549  * parse an XML in-memory buffer and call the given SAX handler routines.
14550  *
14551  * Returns 0 in case of success or a error number otherwise
14552  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14553 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14554 			  const char *buffer, int size) {
14555     int ret = 0;
14556     xmlParserCtxtPtr ctxt;
14557 
14558     xmlInitParser();
14559 
14560     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14561     if (ctxt == NULL) return -1;
14562     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14563         xmlFree(ctxt->sax);
14564     ctxt->sax = sax;
14565     xmlDetectSAX2(ctxt);
14566 
14567     if (user_data != NULL)
14568 	ctxt->userData = user_data;
14569 
14570     xmlParseDocument(ctxt);
14571 
14572     if (ctxt->wellFormed)
14573 	ret = 0;
14574     else {
14575         if (ctxt->errNo != 0)
14576 	    ret = ctxt->errNo;
14577 	else
14578 	    ret = -1;
14579     }
14580     if (sax != NULL)
14581         ctxt->sax = NULL;
14582     if (ctxt->myDoc != NULL) {
14583         xmlFreeDoc(ctxt->myDoc);
14584 	ctxt->myDoc = NULL;
14585     }
14586     xmlFreeParserCtxt(ctxt);
14587 
14588     return ret;
14589 }
14590 #endif /* LIBXML_SAX1_ENABLED */
14591 
14592 /**
14593  * xmlCreateDocParserCtxt:
14594  * @cur:  a pointer to an array of xmlChar
14595  *
14596  * Creates a parser context for an XML in-memory document.
14597  *
14598  * Returns the new parser context or NULL
14599  */
14600 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14601 xmlCreateDocParserCtxt(const xmlChar *cur) {
14602     int len;
14603 
14604     if (cur == NULL)
14605 	return(NULL);
14606     len = xmlStrlen(cur);
14607     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14608 }
14609 
14610 #ifdef LIBXML_SAX1_ENABLED
14611 /**
14612  * xmlSAXParseDoc:
14613  * @sax:  the SAX handler block
14614  * @cur:  a pointer to an array of xmlChar
14615  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14616  *             documents
14617  *
14618  * parse an XML in-memory document and build a tree.
14619  * It use the given SAX function block to handle the parsing callback.
14620  * If sax is NULL, fallback to the default DOM tree building routines.
14621  *
14622  * Returns the resulting document tree
14623  */
14624 
14625 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14626 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14627     xmlDocPtr ret;
14628     xmlParserCtxtPtr ctxt;
14629     xmlSAXHandlerPtr oldsax = NULL;
14630 
14631     if (cur == NULL) return(NULL);
14632 
14633 
14634     ctxt = xmlCreateDocParserCtxt(cur);
14635     if (ctxt == NULL) return(NULL);
14636     if (sax != NULL) {
14637         oldsax = ctxt->sax;
14638         ctxt->sax = sax;
14639         ctxt->userData = NULL;
14640     }
14641     xmlDetectSAX2(ctxt);
14642 
14643     xmlParseDocument(ctxt);
14644     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14645     else {
14646        ret = NULL;
14647        xmlFreeDoc(ctxt->myDoc);
14648        ctxt->myDoc = NULL;
14649     }
14650     if (sax != NULL)
14651 	ctxt->sax = oldsax;
14652     xmlFreeParserCtxt(ctxt);
14653 
14654     return(ret);
14655 }
14656 
14657 /**
14658  * xmlParseDoc:
14659  * @cur:  a pointer to an array of xmlChar
14660  *
14661  * parse an XML in-memory document and build a tree.
14662  *
14663  * Returns the resulting document tree
14664  */
14665 
14666 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14667 xmlParseDoc(const xmlChar *cur) {
14668     return(xmlSAXParseDoc(NULL, cur, 0));
14669 }
14670 #endif /* LIBXML_SAX1_ENABLED */
14671 
14672 #ifdef LIBXML_LEGACY_ENABLED
14673 /************************************************************************
14674  *									*
14675  *	Specific function to keep track of entities references		*
14676  *	and used by the XSLT debugger					*
14677  *									*
14678  ************************************************************************/
14679 
14680 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14681 
14682 /**
14683  * xmlAddEntityReference:
14684  * @ent : A valid entity
14685  * @firstNode : A valid first node for children of entity
14686  * @lastNode : A valid last node of children entity
14687  *
14688  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14689  */
14690 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14691 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14692                       xmlNodePtr lastNode)
14693 {
14694     if (xmlEntityRefFunc != NULL) {
14695         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14696     }
14697 }
14698 
14699 
14700 /**
14701  * xmlSetEntityReferenceFunc:
14702  * @func: A valid function
14703  *
14704  * Set the function to call call back when a xml reference has been made
14705  */
14706 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14707 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14708 {
14709     xmlEntityRefFunc = func;
14710 }
14711 #endif /* LIBXML_LEGACY_ENABLED */
14712 
14713 /************************************************************************
14714  *									*
14715  *				Miscellaneous				*
14716  *									*
14717  ************************************************************************/
14718 
14719 #ifdef LIBXML_XPATH_ENABLED
14720 #include <libxml/xpath.h>
14721 #endif
14722 
14723 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14724 static int xmlParserInitialized = 0;
14725 
14726 /**
14727  * xmlInitParser:
14728  *
14729  * Initialization function for the XML parser.
14730  * This is not reentrant. Call once before processing in case of
14731  * use in multithreaded programs.
14732  */
14733 
14734 void
xmlInitParser(void)14735 xmlInitParser(void) {
14736     if (xmlParserInitialized != 0)
14737 	return;
14738 
14739 #ifdef LIBXML_THREAD_ENABLED
14740     __xmlGlobalInitMutexLock();
14741     if (xmlParserInitialized == 0) {
14742 #endif
14743 	xmlInitThreads();
14744 	xmlInitGlobals();
14745 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14746 	    (xmlGenericError == NULL))
14747 	    initGenericErrorDefaultFunc(NULL);
14748 	xmlInitMemory();
14749         xmlInitializeDict();
14750 	xmlInitCharEncodingHandlers();
14751 	xmlDefaultSAXHandlerInit();
14752 	xmlRegisterDefaultInputCallbacks();
14753 #ifdef LIBXML_OUTPUT_ENABLED
14754 	xmlRegisterDefaultOutputCallbacks();
14755 #endif /* LIBXML_OUTPUT_ENABLED */
14756 #ifdef LIBXML_HTML_ENABLED
14757 	htmlInitAutoClose();
14758 	htmlDefaultSAXHandlerInit();
14759 #endif
14760 #ifdef LIBXML_XPATH_ENABLED
14761 	xmlXPathInit();
14762 #endif
14763 	xmlParserInitialized = 1;
14764 #ifdef LIBXML_THREAD_ENABLED
14765     }
14766     __xmlGlobalInitMutexUnlock();
14767 #endif
14768 }
14769 
14770 /**
14771  * xmlCleanupParser:
14772  *
14773  * This function name is somewhat misleading. It does not clean up
14774  * parser state, it cleans up memory allocated by the library itself.
14775  * It is a cleanup function for the XML library. It tries to reclaim all
14776  * related global memory allocated for the library processing.
14777  * It doesn't deallocate any document related memory. One should
14778  * call xmlCleanupParser() only when the process has finished using
14779  * the library and all XML/HTML documents built with it.
14780  * See also xmlInitParser() which has the opposite function of preparing
14781  * the library for operations.
14782  *
14783  * WARNING: if your application is multithreaded or has plugin support
14784  *          calling this may crash the application if another thread or
14785  *          a plugin is still using libxml2. It's sometimes very hard to
14786  *          guess if libxml2 is in use in the application, some libraries
14787  *          or plugins may use it without notice. In case of doubt abstain
14788  *          from calling this function or do it just before calling exit()
14789  *          to avoid leak reports from valgrind !
14790  */
14791 
14792 void
xmlCleanupParser(void)14793 xmlCleanupParser(void) {
14794     if (!xmlParserInitialized)
14795 	return;
14796 
14797     xmlCleanupCharEncodingHandlers();
14798 #ifdef LIBXML_CATALOG_ENABLED
14799     xmlCatalogCleanup();
14800 #endif
14801     xmlDictCleanup();
14802     xmlCleanupInputCallbacks();
14803 #ifdef LIBXML_OUTPUT_ENABLED
14804     xmlCleanupOutputCallbacks();
14805 #endif
14806 #ifdef LIBXML_SCHEMAS_ENABLED
14807     xmlSchemaCleanupTypes();
14808     xmlRelaxNGCleanupTypes();
14809 #endif
14810     xmlResetLastError();
14811     xmlCleanupGlobals();
14812     xmlCleanupThreads(); /* must be last if called not from the main thread */
14813     xmlCleanupMemory();
14814     xmlParserInitialized = 0;
14815 }
14816 
14817 /************************************************************************
14818  *									*
14819  *	New set (2.6.0) of simpler and more flexible APIs		*
14820  *									*
14821  ************************************************************************/
14822 
14823 /**
14824  * DICT_FREE:
14825  * @str:  a string
14826  *
14827  * Free a string if it is not owned by the "dict" dictionary in the
14828  * current scope
14829  */
14830 #define DICT_FREE(str)						\
14831 	if ((str) && ((!dict) ||				\
14832 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14833 	    xmlFree((char *)(str));
14834 
14835 /**
14836  * xmlCtxtReset:
14837  * @ctxt: an XML parser context
14838  *
14839  * Reset a parser context
14840  */
14841 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14842 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14843 {
14844     xmlParserInputPtr input;
14845     xmlDictPtr dict;
14846 
14847     if (ctxt == NULL)
14848         return;
14849 
14850     dict = ctxt->dict;
14851 
14852     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14853         xmlFreeInputStream(input);
14854     }
14855     ctxt->inputNr = 0;
14856     ctxt->input = NULL;
14857 
14858     ctxt->spaceNr = 0;
14859     if (ctxt->spaceTab != NULL) {
14860 	ctxt->spaceTab[0] = -1;
14861 	ctxt->space = &ctxt->spaceTab[0];
14862     } else {
14863         ctxt->space = NULL;
14864     }
14865 
14866 
14867     ctxt->nodeNr = 0;
14868     ctxt->node = NULL;
14869 
14870     ctxt->nameNr = 0;
14871     ctxt->name = NULL;
14872 
14873     DICT_FREE(ctxt->version);
14874     ctxt->version = NULL;
14875     DICT_FREE(ctxt->encoding);
14876     ctxt->encoding = NULL;
14877     DICT_FREE(ctxt->directory);
14878     ctxt->directory = NULL;
14879     DICT_FREE(ctxt->extSubURI);
14880     ctxt->extSubURI = NULL;
14881     DICT_FREE(ctxt->extSubSystem);
14882     ctxt->extSubSystem = NULL;
14883     if (ctxt->myDoc != NULL)
14884         xmlFreeDoc(ctxt->myDoc);
14885     ctxt->myDoc = NULL;
14886 
14887     ctxt->standalone = -1;
14888     ctxt->hasExternalSubset = 0;
14889     ctxt->hasPErefs = 0;
14890     ctxt->html = 0;
14891     ctxt->external = 0;
14892     ctxt->instate = XML_PARSER_START;
14893     ctxt->token = 0;
14894 
14895     ctxt->wellFormed = 1;
14896     ctxt->nsWellFormed = 1;
14897     ctxt->disableSAX = 0;
14898     ctxt->valid = 1;
14899 #if 0
14900     ctxt->vctxt.userData = ctxt;
14901     ctxt->vctxt.error = xmlParserValidityError;
14902     ctxt->vctxt.warning = xmlParserValidityWarning;
14903 #endif
14904     ctxt->record_info = 0;
14905     ctxt->nbChars = 0;
14906     ctxt->checkIndex = 0;
14907     ctxt->inSubset = 0;
14908     ctxt->errNo = XML_ERR_OK;
14909     ctxt->depth = 0;
14910     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14911     ctxt->catalogs = NULL;
14912     ctxt->nbentities = 0;
14913     ctxt->sizeentities = 0;
14914     ctxt->sizeentcopy = 0;
14915     xmlInitNodeInfoSeq(&ctxt->node_seq);
14916 
14917     if (ctxt->attsDefault != NULL) {
14918         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14919         ctxt->attsDefault = NULL;
14920     }
14921     if (ctxt->attsSpecial != NULL) {
14922         xmlHashFree(ctxt->attsSpecial, NULL);
14923         ctxt->attsSpecial = NULL;
14924     }
14925 
14926 #ifdef LIBXML_CATALOG_ENABLED
14927     if (ctxt->catalogs != NULL)
14928 	xmlCatalogFreeLocal(ctxt->catalogs);
14929 #endif
14930     if (ctxt->lastError.code != XML_ERR_OK)
14931         xmlResetError(&ctxt->lastError);
14932 }
14933 
14934 /**
14935  * xmlCtxtResetPush:
14936  * @ctxt: an XML parser context
14937  * @chunk:  a pointer to an array of chars
14938  * @size:  number of chars in the array
14939  * @filename:  an optional file name or URI
14940  * @encoding:  the document encoding, or NULL
14941  *
14942  * Reset a push parser context
14943  *
14944  * Returns 0 in case of success and 1 in case of error
14945  */
14946 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14947 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14948                  int size, const char *filename, const char *encoding)
14949 {
14950     xmlParserInputPtr inputStream;
14951     xmlParserInputBufferPtr buf;
14952     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14953 
14954     if (ctxt == NULL)
14955         return(1);
14956 
14957     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14958         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14959 
14960     buf = xmlAllocParserInputBuffer(enc);
14961     if (buf == NULL)
14962         return(1);
14963 
14964     if (ctxt == NULL) {
14965         xmlFreeParserInputBuffer(buf);
14966         return(1);
14967     }
14968 
14969     xmlCtxtReset(ctxt);
14970 
14971     if (ctxt->pushTab == NULL) {
14972         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14973 	                                    sizeof(xmlChar *));
14974         if (ctxt->pushTab == NULL) {
14975 	    xmlErrMemory(ctxt, NULL);
14976             xmlFreeParserInputBuffer(buf);
14977             return(1);
14978         }
14979     }
14980 
14981     if (filename == NULL) {
14982         ctxt->directory = NULL;
14983     } else {
14984         ctxt->directory = xmlParserGetDirectory(filename);
14985     }
14986 
14987     inputStream = xmlNewInputStream(ctxt);
14988     if (inputStream == NULL) {
14989         xmlFreeParserInputBuffer(buf);
14990         return(1);
14991     }
14992 
14993     if (filename == NULL)
14994         inputStream->filename = NULL;
14995     else
14996         inputStream->filename = (char *)
14997             xmlCanonicPath((const xmlChar *) filename);
14998     inputStream->buf = buf;
14999     xmlBufResetInput(buf->buffer, inputStream);
15000 
15001     inputPush(ctxt, inputStream);
15002 
15003     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15004         (ctxt->input->buf != NULL)) {
15005 	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15006         size_t cur = ctxt->input->cur - ctxt->input->base;
15007 
15008         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15009 
15010         xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15011 #ifdef DEBUG_PUSH
15012         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15013 #endif
15014     }
15015 
15016     if (encoding != NULL) {
15017         xmlCharEncodingHandlerPtr hdlr;
15018 
15019         if (ctxt->encoding != NULL)
15020 	    xmlFree((xmlChar *) ctxt->encoding);
15021         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15022 
15023         hdlr = xmlFindCharEncodingHandler(encoding);
15024         if (hdlr != NULL) {
15025             xmlSwitchToEncoding(ctxt, hdlr);
15026 	} else {
15027 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15028 			      "Unsupported encoding %s\n", BAD_CAST encoding);
15029         }
15030     } else if (enc != XML_CHAR_ENCODING_NONE) {
15031         xmlSwitchEncoding(ctxt, enc);
15032     }
15033 
15034     return(0);
15035 }
15036 
15037 
15038 /**
15039  * xmlCtxtUseOptionsInternal:
15040  * @ctxt: an XML parser context
15041  * @options:  a combination of xmlParserOption
15042  * @encoding:  the user provided encoding to use
15043  *
15044  * Applies the options to the parser context
15045  *
15046  * Returns 0 in case of success, the set of unknown or unimplemented options
15047  *         in case of error.
15048  */
15049 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15050 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15051 {
15052     if (ctxt == NULL)
15053         return(-1);
15054     if (encoding != NULL) {
15055         if (ctxt->encoding != NULL)
15056 	    xmlFree((xmlChar *) ctxt->encoding);
15057         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15058     }
15059     if (options & XML_PARSE_RECOVER) {
15060         ctxt->recovery = 1;
15061         options -= XML_PARSE_RECOVER;
15062 	ctxt->options |= XML_PARSE_RECOVER;
15063     } else
15064         ctxt->recovery = 0;
15065     if (options & XML_PARSE_DTDLOAD) {
15066         ctxt->loadsubset = XML_DETECT_IDS;
15067         options -= XML_PARSE_DTDLOAD;
15068 	ctxt->options |= XML_PARSE_DTDLOAD;
15069     } else
15070         ctxt->loadsubset = 0;
15071     if (options & XML_PARSE_DTDATTR) {
15072         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15073         options -= XML_PARSE_DTDATTR;
15074 	ctxt->options |= XML_PARSE_DTDATTR;
15075     }
15076     if (options & XML_PARSE_NOENT) {
15077         ctxt->replaceEntities = 1;
15078         /* ctxt->loadsubset |= XML_DETECT_IDS; */
15079         options -= XML_PARSE_NOENT;
15080 	ctxt->options |= XML_PARSE_NOENT;
15081     } else
15082         ctxt->replaceEntities = 0;
15083     if (options & XML_PARSE_PEDANTIC) {
15084         ctxt->pedantic = 1;
15085         options -= XML_PARSE_PEDANTIC;
15086 	ctxt->options |= XML_PARSE_PEDANTIC;
15087     } else
15088         ctxt->pedantic = 0;
15089     if (options & XML_PARSE_NOBLANKS) {
15090         ctxt->keepBlanks = 0;
15091         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15092         options -= XML_PARSE_NOBLANKS;
15093 	ctxt->options |= XML_PARSE_NOBLANKS;
15094     } else
15095         ctxt->keepBlanks = 1;
15096     if (options & XML_PARSE_DTDVALID) {
15097         ctxt->validate = 1;
15098         if (options & XML_PARSE_NOWARNING)
15099             ctxt->vctxt.warning = NULL;
15100         if (options & XML_PARSE_NOERROR)
15101             ctxt->vctxt.error = NULL;
15102         options -= XML_PARSE_DTDVALID;
15103 	ctxt->options |= XML_PARSE_DTDVALID;
15104     } else
15105         ctxt->validate = 0;
15106     if (options & XML_PARSE_NOWARNING) {
15107         ctxt->sax->warning = NULL;
15108         options -= XML_PARSE_NOWARNING;
15109     }
15110     if (options & XML_PARSE_NOERROR) {
15111         ctxt->sax->error = NULL;
15112         ctxt->sax->fatalError = NULL;
15113         options -= XML_PARSE_NOERROR;
15114     }
15115 #ifdef LIBXML_SAX1_ENABLED
15116     if (options & XML_PARSE_SAX1) {
15117         ctxt->sax->startElement = xmlSAX2StartElement;
15118         ctxt->sax->endElement = xmlSAX2EndElement;
15119         ctxt->sax->startElementNs = NULL;
15120         ctxt->sax->endElementNs = NULL;
15121         ctxt->sax->initialized = 1;
15122         options -= XML_PARSE_SAX1;
15123 	ctxt->options |= XML_PARSE_SAX1;
15124     }
15125 #endif /* LIBXML_SAX1_ENABLED */
15126     if (options & XML_PARSE_NODICT) {
15127         ctxt->dictNames = 0;
15128         options -= XML_PARSE_NODICT;
15129 	ctxt->options |= XML_PARSE_NODICT;
15130     } else {
15131         ctxt->dictNames = 1;
15132     }
15133     if (options & XML_PARSE_NOCDATA) {
15134         ctxt->sax->cdataBlock = NULL;
15135         options -= XML_PARSE_NOCDATA;
15136 	ctxt->options |= XML_PARSE_NOCDATA;
15137     }
15138     if (options & XML_PARSE_NSCLEAN) {
15139 	ctxt->options |= XML_PARSE_NSCLEAN;
15140         options -= XML_PARSE_NSCLEAN;
15141     }
15142     if (options & XML_PARSE_NONET) {
15143 	ctxt->options |= XML_PARSE_NONET;
15144         options -= XML_PARSE_NONET;
15145     }
15146     if (options & XML_PARSE_COMPACT) {
15147 	ctxt->options |= XML_PARSE_COMPACT;
15148         options -= XML_PARSE_COMPACT;
15149     }
15150     if (options & XML_PARSE_OLD10) {
15151 	ctxt->options |= XML_PARSE_OLD10;
15152         options -= XML_PARSE_OLD10;
15153     }
15154     if (options & XML_PARSE_NOBASEFIX) {
15155 	ctxt->options |= XML_PARSE_NOBASEFIX;
15156         options -= XML_PARSE_NOBASEFIX;
15157     }
15158     if (options & XML_PARSE_HUGE) {
15159 	ctxt->options |= XML_PARSE_HUGE;
15160         options -= XML_PARSE_HUGE;
15161         if (ctxt->dict != NULL)
15162             xmlDictSetLimit(ctxt->dict, 0);
15163     }
15164     if (options & XML_PARSE_OLDSAX) {
15165 	ctxt->options |= XML_PARSE_OLDSAX;
15166         options -= XML_PARSE_OLDSAX;
15167     }
15168     if (options & XML_PARSE_IGNORE_ENC) {
15169 	ctxt->options |= XML_PARSE_IGNORE_ENC;
15170         options -= XML_PARSE_IGNORE_ENC;
15171     }
15172     if (options & XML_PARSE_BIG_LINES) {
15173 	ctxt->options |= XML_PARSE_BIG_LINES;
15174         options -= XML_PARSE_BIG_LINES;
15175     }
15176     ctxt->linenumbers = 1;
15177     return (options);
15178 }
15179 
15180 /**
15181  * xmlCtxtUseOptions:
15182  * @ctxt: an XML parser context
15183  * @options:  a combination of xmlParserOption
15184  *
15185  * Applies the options to the parser context
15186  *
15187  * Returns 0 in case of success, the set of unknown or unimplemented options
15188  *         in case of error.
15189  */
15190 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15191 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15192 {
15193    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15194 }
15195 
15196 /**
15197  * xmlDoRead:
15198  * @ctxt:  an XML parser context
15199  * @URL:  the base URL to use for the document
15200  * @encoding:  the document encoding, or NULL
15201  * @options:  a combination of xmlParserOption
15202  * @reuse:  keep the context for reuse
15203  *
15204  * Common front-end for the xmlRead functions
15205  *
15206  * Returns the resulting document tree or NULL
15207  */
15208 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15209 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15210           int options, int reuse)
15211 {
15212     xmlDocPtr ret;
15213 
15214     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15215     if (encoding != NULL) {
15216         xmlCharEncodingHandlerPtr hdlr;
15217 
15218 	hdlr = xmlFindCharEncodingHandler(encoding);
15219 	if (hdlr != NULL)
15220 	    xmlSwitchToEncoding(ctxt, hdlr);
15221     }
15222     if ((URL != NULL) && (ctxt->input != NULL) &&
15223         (ctxt->input->filename == NULL))
15224         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15225     xmlParseDocument(ctxt);
15226     if ((ctxt->wellFormed) || ctxt->recovery)
15227         ret = ctxt->myDoc;
15228     else {
15229         ret = NULL;
15230 	if (ctxt->myDoc != NULL) {
15231 	    xmlFreeDoc(ctxt->myDoc);
15232 	}
15233     }
15234     ctxt->myDoc = NULL;
15235     if (!reuse) {
15236 	xmlFreeParserCtxt(ctxt);
15237     }
15238 
15239     return (ret);
15240 }
15241 
15242 /**
15243  * xmlReadDoc:
15244  * @cur:  a pointer to a zero terminated string
15245  * @URL:  the base URL to use for the document
15246  * @encoding:  the document encoding, or NULL
15247  * @options:  a combination of xmlParserOption
15248  *
15249  * parse an XML in-memory document and build a tree.
15250  *
15251  * Returns the resulting document tree
15252  */
15253 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15254 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15255 {
15256     xmlParserCtxtPtr ctxt;
15257 
15258     if (cur == NULL)
15259         return (NULL);
15260     xmlInitParser();
15261 
15262     ctxt = xmlCreateDocParserCtxt(cur);
15263     if (ctxt == NULL)
15264         return (NULL);
15265     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15266 }
15267 
15268 /**
15269  * xmlReadFile:
15270  * @filename:  a file or URL
15271  * @encoding:  the document encoding, or NULL
15272  * @options:  a combination of xmlParserOption
15273  *
15274  * parse an XML file from the filesystem or the network.
15275  *
15276  * Returns the resulting document tree
15277  */
15278 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15279 xmlReadFile(const char *filename, const char *encoding, int options)
15280 {
15281     xmlParserCtxtPtr ctxt;
15282 
15283     xmlInitParser();
15284     ctxt = xmlCreateURLParserCtxt(filename, options);
15285     if (ctxt == NULL)
15286         return (NULL);
15287     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15288 }
15289 
15290 /**
15291  * xmlReadMemory:
15292  * @buffer:  a pointer to a char array
15293  * @size:  the size of the array
15294  * @URL:  the base URL to use for the document
15295  * @encoding:  the document encoding, or NULL
15296  * @options:  a combination of xmlParserOption
15297  *
15298  * parse an XML in-memory document and build a tree.
15299  *
15300  * Returns the resulting document tree
15301  */
15302 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15303 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15304 {
15305     xmlParserCtxtPtr ctxt;
15306 
15307     xmlInitParser();
15308     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15309     if (ctxt == NULL)
15310         return (NULL);
15311     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15312 }
15313 
15314 /**
15315  * xmlReadFd:
15316  * @fd:  an open file descriptor
15317  * @URL:  the base URL to use for the document
15318  * @encoding:  the document encoding, or NULL
15319  * @options:  a combination of xmlParserOption
15320  *
15321  * parse an XML from a file descriptor and build a tree.
15322  * NOTE that the file descriptor will not be closed when the
15323  *      reader is closed or reset.
15324  *
15325  * Returns the resulting document tree
15326  */
15327 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15328 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15329 {
15330     xmlParserCtxtPtr ctxt;
15331     xmlParserInputBufferPtr input;
15332     xmlParserInputPtr stream;
15333 
15334     if (fd < 0)
15335         return (NULL);
15336     xmlInitParser();
15337 
15338     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15339     if (input == NULL)
15340         return (NULL);
15341     input->closecallback = NULL;
15342     ctxt = xmlNewParserCtxt();
15343     if (ctxt == NULL) {
15344         xmlFreeParserInputBuffer(input);
15345         return (NULL);
15346     }
15347     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15348     if (stream == NULL) {
15349         xmlFreeParserInputBuffer(input);
15350 	xmlFreeParserCtxt(ctxt);
15351         return (NULL);
15352     }
15353     inputPush(ctxt, stream);
15354     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15355 }
15356 
15357 /**
15358  * xmlReadIO:
15359  * @ioread:  an I/O read function
15360  * @ioclose:  an I/O close function
15361  * @ioctx:  an I/O handler
15362  * @URL:  the base URL to use for the document
15363  * @encoding:  the document encoding, or NULL
15364  * @options:  a combination of xmlParserOption
15365  *
15366  * parse an XML document from I/O functions and source and build a tree.
15367  *
15368  * Returns the resulting document tree
15369  */
15370 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15371 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15372           void *ioctx, const char *URL, const char *encoding, int options)
15373 {
15374     xmlParserCtxtPtr ctxt;
15375     xmlParserInputBufferPtr input;
15376     xmlParserInputPtr stream;
15377 
15378     if (ioread == NULL)
15379         return (NULL);
15380     xmlInitParser();
15381 
15382     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15383                                          XML_CHAR_ENCODING_NONE);
15384     if (input == NULL) {
15385         if (ioclose != NULL)
15386             ioclose(ioctx);
15387         return (NULL);
15388     }
15389     ctxt = xmlNewParserCtxt();
15390     if (ctxt == NULL) {
15391         xmlFreeParserInputBuffer(input);
15392         return (NULL);
15393     }
15394     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15395     if (stream == NULL) {
15396         xmlFreeParserInputBuffer(input);
15397 	xmlFreeParserCtxt(ctxt);
15398         return (NULL);
15399     }
15400     inputPush(ctxt, stream);
15401     return (xmlDoRead(ctxt, URL, encoding, options, 0));
15402 }
15403 
15404 /**
15405  * xmlCtxtReadDoc:
15406  * @ctxt:  an XML parser context
15407  * @cur:  a pointer to a zero terminated string
15408  * @URL:  the base URL to use for the document
15409  * @encoding:  the document encoding, or NULL
15410  * @options:  a combination of xmlParserOption
15411  *
15412  * parse an XML in-memory document and build a tree.
15413  * This reuses the existing @ctxt parser context
15414  *
15415  * Returns the resulting document tree
15416  */
15417 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15418 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15419                const char *URL, const char *encoding, int options)
15420 {
15421     xmlParserInputPtr stream;
15422 
15423     if (cur == NULL)
15424         return (NULL);
15425     if (ctxt == NULL)
15426         return (NULL);
15427     xmlInitParser();
15428 
15429     xmlCtxtReset(ctxt);
15430 
15431     stream = xmlNewStringInputStream(ctxt, cur);
15432     if (stream == NULL) {
15433         return (NULL);
15434     }
15435     inputPush(ctxt, stream);
15436     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15437 }
15438 
15439 /**
15440  * xmlCtxtReadFile:
15441  * @ctxt:  an XML parser context
15442  * @filename:  a file or URL
15443  * @encoding:  the document encoding, or NULL
15444  * @options:  a combination of xmlParserOption
15445  *
15446  * parse an XML file from the filesystem or the network.
15447  * This reuses the existing @ctxt parser context
15448  *
15449  * Returns the resulting document tree
15450  */
15451 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15452 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15453                 const char *encoding, int options)
15454 {
15455     xmlParserInputPtr stream;
15456 
15457     if (filename == NULL)
15458         return (NULL);
15459     if (ctxt == NULL)
15460         return (NULL);
15461     xmlInitParser();
15462 
15463     xmlCtxtReset(ctxt);
15464 
15465     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15466     if (stream == NULL) {
15467         return (NULL);
15468     }
15469     inputPush(ctxt, stream);
15470     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15471 }
15472 
15473 /**
15474  * xmlCtxtReadMemory:
15475  * @ctxt:  an XML parser context
15476  * @buffer:  a pointer to a char array
15477  * @size:  the size of the array
15478  * @URL:  the base URL to use for the document
15479  * @encoding:  the document encoding, or NULL
15480  * @options:  a combination of xmlParserOption
15481  *
15482  * parse an XML in-memory document and build a tree.
15483  * This reuses the existing @ctxt parser context
15484  *
15485  * Returns the resulting document tree
15486  */
15487 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15488 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15489                   const char *URL, const char *encoding, int options)
15490 {
15491     xmlParserInputBufferPtr input;
15492     xmlParserInputPtr stream;
15493 
15494     if (ctxt == NULL)
15495         return (NULL);
15496     if (buffer == NULL)
15497         return (NULL);
15498     xmlInitParser();
15499 
15500     xmlCtxtReset(ctxt);
15501 
15502     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15503     if (input == NULL) {
15504 	return(NULL);
15505     }
15506 
15507     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15508     if (stream == NULL) {
15509 	xmlFreeParserInputBuffer(input);
15510 	return(NULL);
15511     }
15512 
15513     inputPush(ctxt, stream);
15514     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15515 }
15516 
15517 /**
15518  * xmlCtxtReadFd:
15519  * @ctxt:  an XML parser context
15520  * @fd:  an open file descriptor
15521  * @URL:  the base URL to use for the document
15522  * @encoding:  the document encoding, or NULL
15523  * @options:  a combination of xmlParserOption
15524  *
15525  * parse an XML from a file descriptor and build a tree.
15526  * This reuses the existing @ctxt parser context
15527  * NOTE that the file descriptor will not be closed when the
15528  *      reader is closed or reset.
15529  *
15530  * Returns the resulting document tree
15531  */
15532 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15533 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15534               const char *URL, const char *encoding, int options)
15535 {
15536     xmlParserInputBufferPtr input;
15537     xmlParserInputPtr stream;
15538 
15539     if (fd < 0)
15540         return (NULL);
15541     if (ctxt == NULL)
15542         return (NULL);
15543     xmlInitParser();
15544 
15545     xmlCtxtReset(ctxt);
15546 
15547 
15548     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15549     if (input == NULL)
15550         return (NULL);
15551     input->closecallback = NULL;
15552     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15553     if (stream == NULL) {
15554         xmlFreeParserInputBuffer(input);
15555         return (NULL);
15556     }
15557     inputPush(ctxt, stream);
15558     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15559 }
15560 
15561 /**
15562  * xmlCtxtReadIO:
15563  * @ctxt:  an XML parser context
15564  * @ioread:  an I/O read function
15565  * @ioclose:  an I/O close function
15566  * @ioctx:  an I/O handler
15567  * @URL:  the base URL to use for the document
15568  * @encoding:  the document encoding, or NULL
15569  * @options:  a combination of xmlParserOption
15570  *
15571  * parse an XML document from I/O functions and source and build a tree.
15572  * This reuses the existing @ctxt parser context
15573  *
15574  * Returns the resulting document tree
15575  */
15576 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15577 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15578               xmlInputCloseCallback ioclose, void *ioctx,
15579 	      const char *URL,
15580               const char *encoding, int options)
15581 {
15582     xmlParserInputBufferPtr input;
15583     xmlParserInputPtr stream;
15584 
15585     if (ioread == NULL)
15586         return (NULL);
15587     if (ctxt == NULL)
15588         return (NULL);
15589     xmlInitParser();
15590 
15591     xmlCtxtReset(ctxt);
15592 
15593     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15594                                          XML_CHAR_ENCODING_NONE);
15595     if (input == NULL) {
15596         if (ioclose != NULL)
15597             ioclose(ioctx);
15598         return (NULL);
15599     }
15600     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15601     if (stream == NULL) {
15602         xmlFreeParserInputBuffer(input);
15603         return (NULL);
15604     }
15605     inputPush(ctxt, stream);
15606     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15607 }
15608 
15609 #define bottom_parser
15610 #include "elfgcchack.h"
15611