1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82 
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85 
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
89 
90 /************************************************************************
91  *									*
92  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
93  *									*
94  ************************************************************************/
95 
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
98 
99 /*
100  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101  *    replacement over the size in byte of the input indicates that you have
102  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
103  *    replacement per byte of input.
104  */
105 #define XML_PARSER_NON_LINEAR 10
106 
107 /*
108  * xmlParserEntityCheck
109  *
110  * Function to check non-linear entity expansion behaviour
111  * This is here to detect and stop exponential linear entity expansion
112  * This is not a limitation of the parser but a safety
113  * boundary feature. It can be disabled with the XML_PARSE_HUGE
114  * parser option.
115  */
116 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118                      xmlEntityPtr ent)
119 {
120     unsigned long consumed = 0;
121 
122     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123         return (0);
124     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125         return (1);
126     if (size != 0) {
127         /*
128          * Do the check based on the replacement size of the entity
129          */
130         if (size < XML_PARSER_BIG_ENTITY)
131 	    return(0);
132 
133         /*
134          * A limit on the amount of text data reasonably used
135          */
136         if (ctxt->input != NULL) {
137             consumed = ctxt->input->consumed +
138                 (ctxt->input->cur - ctxt->input->base);
139         }
140         consumed += ctxt->sizeentities;
141 
142         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144             return (0);
145     } else if (ent != NULL) {
146         /*
147          * use the number of parsed entities in the replacement
148          */
149         size = ent->checked;
150 
151         /*
152          * The amount of data parsed counting entities size only once
153          */
154         if (ctxt->input != NULL) {
155             consumed = ctxt->input->consumed +
156                 (ctxt->input->cur - ctxt->input->base);
157         }
158         consumed += ctxt->sizeentities;
159 
160         /*
161          * Check the density of entities for the amount of data
162 	 * knowing an entity reference will take at least 3 bytes
163          */
164         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165             return (0);
166     } else {
167         /*
168          * strange we got no data for checking just return
169          */
170         return (0);
171     }
172 
173     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174     return (1);
175 }
176 
177 /**
178  * xmlParserMaxDepth:
179  *
180  * arbitrary depth limit for the XML documents that we allow to
181  * process. This is not a limitation of the parser but a safety
182  * boundary feature. It can be disabled with the XML_PARSE_HUGE
183  * parser option.
184  */
185 unsigned int xmlParserMaxDepth = 256;
186 
187 
188 
189 #define SAX2 1
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193 
194 /*
195  * List of XML prefixed PI allowed by W3C specs
196  */
197 
198 static const char *xmlW3CPIs[] = {
199     "xml-stylesheet",
200     NULL
201 };
202 
203 
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206                                               const xmlChar **str);
207 
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 	              xmlSAXHandlerPtr sax,
211 		      void *user_data, int depth, const xmlChar *URL,
212 		      const xmlChar *ID, xmlNodePtr *list);
213 
214 static int
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216                           const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
218 static void
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220                       xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
222 
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
226 
227 static int
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229 
230 /************************************************************************
231  *									*
232  * 		Some factorized error routines				*
233  *									*
234  ************************************************************************/
235 
236 /**
237  * xmlErrAttributeDup:
238  * @ctxt:  an XML parser context
239  * @prefix:  the attribute prefix
240  * @localname:  the attribute localname
241  *
242  * Handle a redefinition of attribute error
243  */
244 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246                    const xmlChar * localname)
247 {
248     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249         (ctxt->instate == XML_PARSER_EOF))
250 	return;
251     if (ctxt != NULL)
252 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253 
254     if (prefix == NULL)
255         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257                         (const char *) localname, NULL, NULL, 0, 0,
258                         "Attribute %s redefined\n", localname);
259     else
260         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262                         (const char *) prefix, (const char *) localname,
263                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264                         localname);
265     if (ctxt != NULL) {
266 	ctxt->wellFormed = 0;
267 	if (ctxt->recovery == 0)
268 	    ctxt->disableSAX = 1;
269     }
270 }
271 
272 /**
273  * xmlFatalErr:
274  * @ctxt:  an XML parser context
275  * @error:  the error number
276  * @extra:  extra information string
277  *
278  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279  */
280 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
282 {
283     const char *errmsg;
284 
285     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286         (ctxt->instate == XML_PARSER_EOF))
287 	return;
288     switch (error) {
289         case XML_ERR_INVALID_HEX_CHARREF:
290             errmsg = "CharRef: invalid hexadecimal value\n";
291             break;
292         case XML_ERR_INVALID_DEC_CHARREF:
293             errmsg = "CharRef: invalid decimal value\n";
294             break;
295         case XML_ERR_INVALID_CHARREF:
296             errmsg = "CharRef: invalid value\n";
297             break;
298         case XML_ERR_INTERNAL_ERROR:
299             errmsg = "internal error";
300             break;
301         case XML_ERR_PEREF_AT_EOF:
302             errmsg = "PEReference at end of document\n";
303             break;
304         case XML_ERR_PEREF_IN_PROLOG:
305             errmsg = "PEReference in prolog\n";
306             break;
307         case XML_ERR_PEREF_IN_EPILOG:
308             errmsg = "PEReference in epilog\n";
309             break;
310         case XML_ERR_PEREF_NO_NAME:
311             errmsg = "PEReference: no name\n";
312             break;
313         case XML_ERR_PEREF_SEMICOL_MISSING:
314             errmsg = "PEReference: expecting ';'\n";
315             break;
316         case XML_ERR_ENTITY_LOOP:
317             errmsg = "Detected an entity reference loop\n";
318             break;
319         case XML_ERR_ENTITY_NOT_STARTED:
320             errmsg = "EntityValue: \" or ' expected\n";
321             break;
322         case XML_ERR_ENTITY_PE_INTERNAL:
323             errmsg = "PEReferences forbidden in internal subset\n";
324             break;
325         case XML_ERR_ENTITY_NOT_FINISHED:
326             errmsg = "EntityValue: \" or ' expected\n";
327             break;
328         case XML_ERR_ATTRIBUTE_NOT_STARTED:
329             errmsg = "AttValue: \" or ' expected\n";
330             break;
331         case XML_ERR_LT_IN_ATTRIBUTE:
332             errmsg = "Unescaped '<' not allowed in attributes values\n";
333             break;
334         case XML_ERR_LITERAL_NOT_STARTED:
335             errmsg = "SystemLiteral \" or ' expected\n";
336             break;
337         case XML_ERR_LITERAL_NOT_FINISHED:
338             errmsg = "Unfinished System or Public ID \" or ' expected\n";
339             break;
340         case XML_ERR_MISPLACED_CDATA_END:
341             errmsg = "Sequence ']]>' not allowed in content\n";
342             break;
343         case XML_ERR_URI_REQUIRED:
344             errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345             break;
346         case XML_ERR_PUBID_REQUIRED:
347             errmsg = "PUBLIC, the Public Identifier is missing\n";
348             break;
349         case XML_ERR_HYPHEN_IN_COMMENT:
350             errmsg = "Comment must not contain '--' (double-hyphen)\n";
351             break;
352         case XML_ERR_PI_NOT_STARTED:
353             errmsg = "xmlParsePI : no target name\n";
354             break;
355         case XML_ERR_RESERVED_XML_NAME:
356             errmsg = "Invalid PI name\n";
357             break;
358         case XML_ERR_NOTATION_NOT_STARTED:
359             errmsg = "NOTATION: Name expected here\n";
360             break;
361         case XML_ERR_NOTATION_NOT_FINISHED:
362             errmsg = "'>' required to close NOTATION declaration\n";
363             break;
364         case XML_ERR_VALUE_REQUIRED:
365             errmsg = "Entity value required\n";
366             break;
367         case XML_ERR_URI_FRAGMENT:
368             errmsg = "Fragment not allowed";
369             break;
370         case XML_ERR_ATTLIST_NOT_STARTED:
371             errmsg = "'(' required to start ATTLIST enumeration\n";
372             break;
373         case XML_ERR_NMTOKEN_REQUIRED:
374             errmsg = "NmToken expected in ATTLIST enumeration\n";
375             break;
376         case XML_ERR_ATTLIST_NOT_FINISHED:
377             errmsg = "')' required to finish ATTLIST enumeration\n";
378             break;
379         case XML_ERR_MIXED_NOT_STARTED:
380             errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381             break;
382         case XML_ERR_PCDATA_REQUIRED:
383             errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384             break;
385         case XML_ERR_ELEMCONTENT_NOT_STARTED:
386             errmsg = "ContentDecl : Name or '(' expected\n";
387             break;
388         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389             errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390             break;
391         case XML_ERR_PEREF_IN_INT_SUBSET:
392             errmsg =
393                 "PEReference: forbidden within markup decl in internal subset\n";
394             break;
395         case XML_ERR_GT_REQUIRED:
396             errmsg = "expected '>'\n";
397             break;
398         case XML_ERR_CONDSEC_INVALID:
399             errmsg = "XML conditional section '[' expected\n";
400             break;
401         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402             errmsg = "Content error in the external subset\n";
403             break;
404         case XML_ERR_CONDSEC_INVALID_KEYWORD:
405             errmsg =
406                 "conditional section INCLUDE or IGNORE keyword expected\n";
407             break;
408         case XML_ERR_CONDSEC_NOT_FINISHED:
409             errmsg = "XML conditional section not closed\n";
410             break;
411         case XML_ERR_XMLDECL_NOT_STARTED:
412             errmsg = "Text declaration '<?xml' required\n";
413             break;
414         case XML_ERR_XMLDECL_NOT_FINISHED:
415             errmsg = "parsing XML declaration: '?>' expected\n";
416             break;
417         case XML_ERR_EXT_ENTITY_STANDALONE:
418             errmsg = "external parsed entities cannot be standalone\n";
419             break;
420         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421             errmsg = "EntityRef: expecting ';'\n";
422             break;
423         case XML_ERR_DOCTYPE_NOT_FINISHED:
424             errmsg = "DOCTYPE improperly terminated\n";
425             break;
426         case XML_ERR_LTSLASH_REQUIRED:
427             errmsg = "EndTag: '</' not found\n";
428             break;
429         case XML_ERR_EQUAL_REQUIRED:
430             errmsg = "expected '='\n";
431             break;
432         case XML_ERR_STRING_NOT_CLOSED:
433             errmsg = "String not closed expecting \" or '\n";
434             break;
435         case XML_ERR_STRING_NOT_STARTED:
436             errmsg = "String not started expecting ' or \"\n";
437             break;
438         case XML_ERR_ENCODING_NAME:
439             errmsg = "Invalid XML encoding name\n";
440             break;
441         case XML_ERR_STANDALONE_VALUE:
442             errmsg = "standalone accepts only 'yes' or 'no'\n";
443             break;
444         case XML_ERR_DOCUMENT_EMPTY:
445             errmsg = "Document is empty\n";
446             break;
447         case XML_ERR_DOCUMENT_END:
448             errmsg = "Extra content at the end of the document\n";
449             break;
450         case XML_ERR_NOT_WELL_BALANCED:
451             errmsg = "chunk is not well balanced\n";
452             break;
453         case XML_ERR_EXTRA_CONTENT:
454             errmsg = "extra content at the end of well balanced chunk\n";
455             break;
456         case XML_ERR_VERSION_MISSING:
457             errmsg = "Malformed declaration expecting version\n";
458             break;
459 #if 0
460         case:
461             errmsg = "\n";
462             break;
463 #endif
464         default:
465             errmsg = "Unregistered error message\n";
466     }
467     if (ctxt != NULL)
468 	ctxt->errNo = error;
469     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471                     info);
472     if (ctxt != NULL) {
473 	ctxt->wellFormed = 0;
474 	if (ctxt->recovery == 0)
475 	    ctxt->disableSAX = 1;
476     }
477 }
478 
479 /**
480  * xmlFatalErrMsg:
481  * @ctxt:  an XML parser context
482  * @error:  the error number
483  * @msg:  the error message
484  *
485  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486  */
487 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489                const char *msg)
490 {
491     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492         (ctxt->instate == XML_PARSER_EOF))
493 	return;
494     if (ctxt != NULL)
495 	ctxt->errNo = error;
496     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498     if (ctxt != NULL) {
499 	ctxt->wellFormed = 0;
500 	if (ctxt->recovery == 0)
501 	    ctxt->disableSAX = 1;
502     }
503 }
504 
505 /**
506  * xmlWarningMsg:
507  * @ctxt:  an XML parser context
508  * @error:  the error number
509  * @msg:  the error message
510  * @str1:  extra data
511  * @str2:  extra data
512  *
513  * Handle a warning.
514  */
515 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517               const char *msg, const xmlChar *str1, const xmlChar *str2)
518 {
519     xmlStructuredErrorFunc schannel = NULL;
520 
521     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522         (ctxt->instate == XML_PARSER_EOF))
523 	return;
524     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525         (ctxt->sax->initialized == XML_SAX2_MAGIC))
526         schannel = ctxt->sax->serror;
527     if (ctxt != NULL) {
528         __xmlRaiseError(schannel,
529                     (ctxt->sax) ? ctxt->sax->warning : NULL,
530                     ctxt->userData,
531                     ctxt, NULL, XML_FROM_PARSER, error,
532                     XML_ERR_WARNING, NULL, 0,
533 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
534 		    msg, (const char *) str1, (const char *) str2);
535     } else {
536         __xmlRaiseError(schannel, NULL, NULL,
537                     ctxt, NULL, XML_FROM_PARSER, error,
538                     XML_ERR_WARNING, NULL, 0,
539 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
540 		    msg, (const char *) str1, (const char *) str2);
541     }
542 }
543 
544 /**
545  * xmlValidityError:
546  * @ctxt:  an XML parser context
547  * @error:  the error number
548  * @msg:  the error message
549  * @str1:  extra data
550  *
551  * Handle a validity error.
552  */
553 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555               const char *msg, const xmlChar *str1, const xmlChar *str2)
556 {
557     xmlStructuredErrorFunc schannel = NULL;
558 
559     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560         (ctxt->instate == XML_PARSER_EOF))
561 	return;
562     if (ctxt != NULL) {
563 	ctxt->errNo = error;
564 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 	    schannel = ctxt->sax->serror;
566     }
567     if (ctxt != NULL) {
568         __xmlRaiseError(schannel,
569                     ctxt->vctxt.error, ctxt->vctxt.userData,
570                     ctxt, NULL, XML_FROM_DTD, error,
571                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 		    (const char *) str2, NULL, 0, 0,
573 		    msg, (const char *) str1, (const char *) str2);
574 	ctxt->valid = 0;
575     } else {
576         __xmlRaiseError(schannel, NULL, NULL,
577                     ctxt, NULL, XML_FROM_DTD, error,
578                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 		    (const char *) str2, NULL, 0, 0,
580 		    msg, (const char *) str1, (const char *) str2);
581     }
582 }
583 
584 /**
585  * xmlFatalErrMsgInt:
586  * @ctxt:  an XML parser context
587  * @error:  the error number
588  * @msg:  the error message
589  * @val:  an integer value
590  *
591  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592  */
593 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595                   const char *msg, int val)
596 {
597     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598         (ctxt->instate == XML_PARSER_EOF))
599 	return;
600     if (ctxt != NULL)
601 	ctxt->errNo = error;
602     __xmlRaiseError(NULL, NULL, NULL,
603                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605     if (ctxt != NULL) {
606 	ctxt->wellFormed = 0;
607 	if (ctxt->recovery == 0)
608 	    ctxt->disableSAX = 1;
609     }
610 }
611 
612 /**
613  * xmlFatalErrMsgStrIntStr:
614  * @ctxt:  an XML parser context
615  * @error:  the error number
616  * @msg:  the error message
617  * @str1:  an string info
618  * @val:  an integer value
619  * @str2:  an string info
620  *
621  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622  */
623 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625                   const char *msg, const xmlChar *str1, int val,
626 		  const xmlChar *str2)
627 {
628     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629         (ctxt->instate == XML_PARSER_EOF))
630 	return;
631     if (ctxt != NULL)
632 	ctxt->errNo = error;
633     __xmlRaiseError(NULL, NULL, NULL,
634                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635                     NULL, 0, (const char *) str1, (const char *) str2,
636 		    NULL, val, 0, msg, str1, val, str2);
637     if (ctxt != NULL) {
638 	ctxt->wellFormed = 0;
639 	if (ctxt->recovery == 0)
640 	    ctxt->disableSAX = 1;
641     }
642 }
643 
644 /**
645  * xmlFatalErrMsgStr:
646  * @ctxt:  an XML parser context
647  * @error:  the error number
648  * @msg:  the error message
649  * @val:  a string value
650  *
651  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652  */
653 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655                   const char *msg, const xmlChar * val)
656 {
657     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658         (ctxt->instate == XML_PARSER_EOF))
659 	return;
660     if (ctxt != NULL)
661 	ctxt->errNo = error;
662     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663                     XML_FROM_PARSER, error, XML_ERR_FATAL,
664                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665                     val);
666     if (ctxt != NULL) {
667 	ctxt->wellFormed = 0;
668 	if (ctxt->recovery == 0)
669 	    ctxt->disableSAX = 1;
670     }
671 }
672 
673 /**
674  * xmlErrMsgStr:
675  * @ctxt:  an XML parser context
676  * @error:  the error number
677  * @msg:  the error message
678  * @val:  a string value
679  *
680  * Handle a non fatal parser error
681  */
682 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684                   const char *msg, const xmlChar * val)
685 {
686     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687         (ctxt->instate == XML_PARSER_EOF))
688 	return;
689     if (ctxt != NULL)
690 	ctxt->errNo = error;
691     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692                     XML_FROM_PARSER, error, XML_ERR_ERROR,
693                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694                     val);
695 }
696 
697 /**
698  * xmlNsErr:
699  * @ctxt:  an XML parser context
700  * @error:  the error number
701  * @msg:  the message
702  * @info1:  extra information string
703  * @info2:  extra information string
704  *
705  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706  */
707 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709          const char *msg,
710          const xmlChar * info1, const xmlChar * info2,
711          const xmlChar * info3)
712 {
713     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714         (ctxt->instate == XML_PARSER_EOF))
715 	return;
716     if (ctxt != NULL)
717 	ctxt->errNo = error;
718     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
720                     (const char *) info2, (const char *) info3, 0, 0, msg,
721                     info1, info2, info3);
722     if (ctxt != NULL)
723 	ctxt->nsWellFormed = 0;
724 }
725 
726 /**
727  * xmlNsWarn
728  * @ctxt:  an XML parser context
729  * @error:  the error number
730  * @msg:  the message
731  * @info1:  extra information string
732  * @info2:  extra information string
733  *
734  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735  */
736 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738          const char *msg,
739          const xmlChar * info1, const xmlChar * info2,
740          const xmlChar * info3)
741 {
742     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743         (ctxt->instate == XML_PARSER_EOF))
744 	return;
745     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
747                     (const char *) info2, (const char *) info3, 0, 0, msg,
748                     info1, info2, info3);
749 }
750 
751 /************************************************************************
752  *									*
753  * 		Library wide options					*
754  *									*
755  ************************************************************************/
756 
757 /**
758   * xmlHasFeature:
759   * @feature: the feature to be examined
760   *
761   * Examines if the library has been compiled with a given feature.
762   *
763   * Returns a non-zero value if the feature exist, otherwise zero.
764   * Returns zero (0) if the feature does not exist or an unknown
765   * unknown feature is requested, non-zero otherwise.
766   */
767 int
xmlHasFeature(xmlFeature feature)768 xmlHasFeature(xmlFeature feature)
769 {
770     switch (feature) {
771 	case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
773 	    return(1);
774 #else
775 	    return(0);
776 #endif
777         case XML_WITH_TREE:
778 #ifdef LIBXML_TREE_ENABLED
779             return(1);
780 #else
781             return(0);
782 #endif
783         case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
785             return(1);
786 #else
787             return(0);
788 #endif
789         case XML_WITH_PUSH:
790 #ifdef LIBXML_PUSH_ENABLED
791             return(1);
792 #else
793             return(0);
794 #endif
795         case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
797             return(1);
798 #else
799             return(0);
800 #endif
801         case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
803             return(1);
804 #else
805             return(0);
806 #endif
807         case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
809             return(1);
810 #else
811             return(0);
812 #endif
813         case XML_WITH_SAX1:
814 #ifdef LIBXML_SAX1_ENABLED
815             return(1);
816 #else
817             return(0);
818 #endif
819         case XML_WITH_FTP:
820 #ifdef LIBXML_FTP_ENABLED
821             return(1);
822 #else
823             return(0);
824 #endif
825         case XML_WITH_HTTP:
826 #ifdef LIBXML_HTTP_ENABLED
827             return(1);
828 #else
829             return(0);
830 #endif
831         case XML_WITH_VALID:
832 #ifdef LIBXML_VALID_ENABLED
833             return(1);
834 #else
835             return(0);
836 #endif
837         case XML_WITH_HTML:
838 #ifdef LIBXML_HTML_ENABLED
839             return(1);
840 #else
841             return(0);
842 #endif
843         case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
845             return(1);
846 #else
847             return(0);
848 #endif
849         case XML_WITH_C14N:
850 #ifdef LIBXML_C14N_ENABLED
851             return(1);
852 #else
853             return(0);
854 #endif
855         case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
857             return(1);
858 #else
859             return(0);
860 #endif
861         case XML_WITH_XPATH:
862 #ifdef LIBXML_XPATH_ENABLED
863             return(1);
864 #else
865             return(0);
866 #endif
867         case XML_WITH_XPTR:
868 #ifdef LIBXML_XPTR_ENABLED
869             return(1);
870 #else
871             return(0);
872 #endif
873         case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
875             return(1);
876 #else
877             return(0);
878 #endif
879         case XML_WITH_ICONV:
880 #ifdef LIBXML_ICONV_ENABLED
881             return(1);
882 #else
883             return(0);
884 #endif
885         case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
887             return(1);
888 #else
889             return(0);
890 #endif
891         case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
893             return(1);
894 #else
895             return(0);
896 #endif
897         case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
899             return(1);
900 #else
901             return(0);
902 #endif
903         case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
905             return(1);
906 #else
907             return(0);
908 #endif
909         case XML_WITH_EXPR:
910 #ifdef LIBXML_EXPR_ENABLED
911             return(1);
912 #else
913             return(0);
914 #endif
915         case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
917             return(1);
918 #else
919             return(0);
920 #endif
921         case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
923             return(1);
924 #else
925             return(0);
926 #endif
927         case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
929             return(1);
930 #else
931             return(0);
932 #endif
933         case XML_WITH_DEBUG:
934 #ifdef LIBXML_DEBUG_ENABLED
935             return(1);
936 #else
937             return(0);
938 #endif
939         case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
941             return(1);
942 #else
943             return(0);
944 #endif
945         case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
947             return(1);
948 #else
949             return(0);
950 #endif
951         case XML_WITH_ZLIB:
952 #ifdef LIBXML_ZLIB_ENABLED
953             return(1);
954 #else
955             return(0);
956 #endif
957         case XML_WITH_ICU:
958 #ifdef LIBXML_ICU_ENABLED
959             return(1);
960 #else
961             return(0);
962 #endif
963         default:
964 	    break;
965      }
966      return(0);
967 }
968 
969 /************************************************************************
970  *									*
971  * 		SAX2 defaulted attributes handling			*
972  *									*
973  ************************************************************************/
974 
975 /**
976  * xmlDetectSAX2:
977  * @ctxt:  an XML parser context
978  *
979  * Do the SAX2 detection and specific intialization
980  */
981 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983     if (ctxt == NULL) return;
984 #ifdef LIBXML_SAX1_ENABLED
985     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986         ((ctxt->sax->startElementNs != NULL) ||
987          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
988 #else
989     ctxt->sax2 = 1;
990 #endif /* LIBXML_SAX1_ENABLED */
991 
992     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996     		(ctxt->str_xml_ns == NULL)) {
997         xmlErrMemory(ctxt, NULL);
998     }
999 }
1000 
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004     int nbAttrs;	/* number of defaulted attributes on that element */
1005     int maxAttrs;       /* the size of the array */
1006     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1007 };
1008 
1009 /**
1010  * xmlAttrNormalizeSpace:
1011  * @src: the source string
1012  * @dst: the target string
1013  *
1014  * Normalize the space in non CDATA attribute values:
1015  * If the attribute type is not CDATA, then the XML processor MUST further
1016  * process the normalized attribute value by discarding any leading and
1017  * trailing space (#x20) characters, and by replacing sequences of space
1018  * (#x20) characters by a single space (#x20) character.
1019  * Note that the size of dst need to be at least src, and if one doesn't need
1020  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021  * passing src as dst is just fine.
1022  *
1023  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024  *         is needed.
1025  */
1026 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028 {
1029     if ((src == NULL) || (dst == NULL))
1030         return(NULL);
1031 
1032     while (*src == 0x20) src++;
1033     while (*src != 0) {
1034 	if (*src == 0x20) {
1035 	    while (*src == 0x20) src++;
1036 	    if (*src != 0)
1037 		*dst++ = 0x20;
1038 	} else {
1039 	    *dst++ = *src++;
1040 	}
1041     }
1042     *dst = 0;
1043     if (dst == src)
1044        return(NULL);
1045     return(dst);
1046 }
1047 
1048 /**
1049  * xmlAttrNormalizeSpace2:
1050  * @src: the source string
1051  *
1052  * Normalize the space in non CDATA attribute values, a slightly more complex
1053  * front end to avoid allocation problems when running on attribute values
1054  * coming from the input.
1055  *
1056  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057  *         is needed.
1058  */
1059 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1061 {
1062     int i;
1063     int remove_head = 0;
1064     int need_realloc = 0;
1065     const xmlChar *cur;
1066 
1067     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068         return(NULL);
1069     i = *len;
1070     if (i <= 0)
1071         return(NULL);
1072 
1073     cur = src;
1074     while (*cur == 0x20) {
1075         cur++;
1076 	remove_head++;
1077     }
1078     while (*cur != 0) {
1079 	if (*cur == 0x20) {
1080 	    cur++;
1081 	    if ((*cur == 0x20) || (*cur == 0)) {
1082 	        need_realloc = 1;
1083 		break;
1084 	    }
1085 	} else
1086 	    cur++;
1087     }
1088     if (need_realloc) {
1089         xmlChar *ret;
1090 
1091 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092 	if (ret == NULL) {
1093 	    xmlErrMemory(ctxt, NULL);
1094 	    return(NULL);
1095 	}
1096 	xmlAttrNormalizeSpace(ret, ret);
1097 	*len = (int) strlen((const char *)ret);
1098         return(ret);
1099     } else if (remove_head) {
1100         *len -= remove_head;
1101         memmove(src, src + remove_head, 1 + *len);
1102 	return(src);
1103     }
1104     return(NULL);
1105 }
1106 
1107 /**
1108  * xmlAddDefAttrs:
1109  * @ctxt:  an XML parser context
1110  * @fullname:  the element fullname
1111  * @fullattr:  the attribute fullname
1112  * @value:  the attribute value
1113  *
1114  * Add a defaulted attribute for an element
1115  */
1116 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118                const xmlChar *fullname,
1119                const xmlChar *fullattr,
1120                const xmlChar *value) {
1121     xmlDefAttrsPtr defaults;
1122     int len;
1123     const xmlChar *name;
1124     const xmlChar *prefix;
1125 
1126     /*
1127      * Allows to detect attribute redefinitions
1128      */
1129     if (ctxt->attsSpecial != NULL) {
1130         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131 	    return;
1132     }
1133 
1134     if (ctxt->attsDefault == NULL) {
1135         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136 	if (ctxt->attsDefault == NULL)
1137 	    goto mem_error;
1138     }
1139 
1140     /*
1141      * split the element name into prefix:localname , the string found
1142      * are within the DTD and then not associated to namespace names.
1143      */
1144     name = xmlSplitQName3(fullname, &len);
1145     if (name == NULL) {
1146         name = xmlDictLookup(ctxt->dict, fullname, -1);
1147 	prefix = NULL;
1148     } else {
1149         name = xmlDictLookup(ctxt->dict, name, -1);
1150 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151     }
1152 
1153     /*
1154      * make sure there is some storage
1155      */
1156     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157     if (defaults == NULL) {
1158         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159 	                   (4 * 5) * sizeof(const xmlChar *));
1160 	if (defaults == NULL)
1161 	    goto mem_error;
1162 	defaults->nbAttrs = 0;
1163 	defaults->maxAttrs = 4;
1164 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 	                        defaults, NULL) < 0) {
1166 	    xmlFree(defaults);
1167 	    goto mem_error;
1168 	}
1169     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170         xmlDefAttrsPtr temp;
1171 
1172         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174 	if (temp == NULL)
1175 	    goto mem_error;
1176 	defaults = temp;
1177 	defaults->maxAttrs *= 2;
1178 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 	                        defaults, NULL) < 0) {
1180 	    xmlFree(defaults);
1181 	    goto mem_error;
1182 	}
1183     }
1184 
1185     /*
1186      * Split the element name into prefix:localname , the string found
1187      * are within the DTD and hen not associated to namespace names.
1188      */
1189     name = xmlSplitQName3(fullattr, &len);
1190     if (name == NULL) {
1191         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192 	prefix = NULL;
1193     } else {
1194         name = xmlDictLookup(ctxt->dict, name, -1);
1195 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196     }
1197 
1198     defaults->values[5 * defaults->nbAttrs] = name;
1199     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200     /* intern the string and precompute the end */
1201     len = xmlStrlen(value);
1202     value = xmlDictLookup(ctxt->dict, value, len);
1203     defaults->values[5 * defaults->nbAttrs + 2] = value;
1204     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205     if (ctxt->external)
1206         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207     else
1208         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209     defaults->nbAttrs++;
1210 
1211     return;
1212 
1213 mem_error:
1214     xmlErrMemory(ctxt, NULL);
1215     return;
1216 }
1217 
1218 /**
1219  * xmlAddSpecialAttr:
1220  * @ctxt:  an XML parser context
1221  * @fullname:  the element fullname
1222  * @fullattr:  the attribute fullname
1223  * @type:  the attribute type
1224  *
1225  * Register this attribute type
1226  */
1227 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 		  const xmlChar *fullname,
1230 		  const xmlChar *fullattr,
1231 		  int type)
1232 {
1233     if (ctxt->attsSpecial == NULL) {
1234         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235 	if (ctxt->attsSpecial == NULL)
1236 	    goto mem_error;
1237     }
1238 
1239     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240         return;
1241 
1242     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243                      (void *) (long) type);
1244     return;
1245 
1246 mem_error:
1247     xmlErrMemory(ctxt, NULL);
1248     return;
1249 }
1250 
1251 /**
1252  * xmlCleanSpecialAttrCallback:
1253  *
1254  * Removes CDATA attributes from the special attribute table
1255  */
1256 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258                             const xmlChar *fullname, const xmlChar *fullattr,
1259                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1260     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261 
1262     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264     }
1265 }
1266 
1267 /**
1268  * xmlCleanSpecialAttr:
1269  * @ctxt:  an XML parser context
1270  *
1271  * Trim the list of attributes defined to remove all those of type
1272  * CDATA as they are not special. This call should be done when finishing
1273  * to parse the DTD and before starting to parse the document root.
1274  */
1275 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277 {
1278     if (ctxt->attsSpecial == NULL)
1279         return;
1280 
1281     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282 
1283     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284         xmlHashFree(ctxt->attsSpecial, NULL);
1285         ctxt->attsSpecial = NULL;
1286     }
1287     return;
1288 }
1289 
1290 /**
1291  * xmlCheckLanguageID:
1292  * @lang:  pointer to the string value
1293  *
1294  * Checks that the value conforms to the LanguageID production:
1295  *
1296  * NOTE: this is somewhat deprecated, those productions were removed from
1297  *       the XML Second edition.
1298  *
1299  * [33] LanguageID ::= Langcode ('-' Subcode)*
1300  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1301  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304  * [38] Subcode ::= ([a-z] | [A-Z])+
1305  *
1306  * The current REC reference the sucessors of RFC 1766, currently 5646
1307  *
1308  * http://www.rfc-editor.org/rfc/rfc5646.txt
1309  * langtag       = language
1310  *                 ["-" script]
1311  *                 ["-" region]
1312  *                 *("-" variant)
1313  *                 *("-" extension)
1314  *                 ["-" privateuse]
1315  * language      = 2*3ALPHA            ; shortest ISO 639 code
1316  *                 ["-" extlang]       ; sometimes followed by
1317  *                                     ; extended language subtags
1318  *               / 4ALPHA              ; or reserved for future use
1319  *               / 5*8ALPHA            ; or registered language subtag
1320  *
1321  * extlang       = 3ALPHA              ; selected ISO 639 codes
1322  *                 *2("-" 3ALPHA)      ; permanently reserved
1323  *
1324  * script        = 4ALPHA              ; ISO 15924 code
1325  *
1326  * region        = 2ALPHA              ; ISO 3166-1 code
1327  *               / 3DIGIT              ; UN M.49 code
1328  *
1329  * variant       = 5*8alphanum         ; registered variants
1330  *               / (DIGIT 3alphanum)
1331  *
1332  * extension     = singleton 1*("-" (2*8alphanum))
1333  *
1334  *                                     ; Single alphanumerics
1335  *                                     ; "x" reserved for private use
1336  * singleton     = DIGIT               ; 0 - 9
1337  *               / %x41-57             ; A - W
1338  *               / %x59-5A             ; Y - Z
1339  *               / %x61-77             ; a - w
1340  *               / %x79-7A             ; y - z
1341  *
1342  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343  * The parser below doesn't try to cope with extension or privateuse
1344  * that could be added but that's not interoperable anyway
1345  *
1346  * Returns 1 if correct 0 otherwise
1347  **/
1348 int
xmlCheckLanguageID(const xmlChar * lang)1349 xmlCheckLanguageID(const xmlChar * lang)
1350 {
1351     const xmlChar *cur = lang, *nxt;
1352 
1353     if (cur == NULL)
1354         return (0);
1355     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1356         ((cur[0] == 'I') && (cur[1] == '-')) ||
1357         ((cur[0] == 'x') && (cur[1] == '-')) ||
1358         ((cur[0] == 'X') && (cur[1] == '-'))) {
1359         /*
1360          * Still allow IANA code and user code which were coming
1361          * from the previous version of the XML-1.0 specification
1362          * it's deprecated but we should not fail
1363          */
1364         cur += 2;
1365         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1366                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1367             cur++;
1368         return(cur[0] == 0);
1369     }
1370     nxt = cur;
1371     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373            nxt++;
1374     if (nxt - cur >= 4) {
1375         /*
1376          * Reserved
1377          */
1378         if ((nxt - cur > 8) || (nxt[0] != 0))
1379             return(0);
1380         return(1);
1381     }
1382     if (nxt - cur < 2)
1383         return(0);
1384     /* we got an ISO 639 code */
1385     if (nxt[0] == 0)
1386         return(1);
1387     if (nxt[0] != '-')
1388         return(0);
1389 
1390     nxt++;
1391     cur = nxt;
1392     /* now we can have extlang or script or region or variant */
1393     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1394         goto region_m49;
1395 
1396     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398            nxt++;
1399     if (nxt - cur == 4)
1400         goto script;
1401     if (nxt - cur == 2)
1402         goto region;
1403     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1404         goto variant;
1405     if (nxt - cur != 3)
1406         return(0);
1407     /* we parsed an extlang */
1408     if (nxt[0] == 0)
1409         return(1);
1410     if (nxt[0] != '-')
1411         return(0);
1412 
1413     nxt++;
1414     cur = nxt;
1415     /* now we can have script or region or variant */
1416     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1417         goto region_m49;
1418 
1419     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1421            nxt++;
1422     if (nxt - cur == 2)
1423         goto region;
1424     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1425         goto variant;
1426     if (nxt - cur != 4)
1427         return(0);
1428     /* we parsed a script */
1429 script:
1430     if (nxt[0] == 0)
1431         return(1);
1432     if (nxt[0] != '-')
1433         return(0);
1434 
1435     nxt++;
1436     cur = nxt;
1437     /* now we can have region or variant */
1438     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1439         goto region_m49;
1440 
1441     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1443            nxt++;
1444 
1445     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1446         goto variant;
1447     if (nxt - cur != 2)
1448         return(0);
1449     /* we parsed a region */
1450 region:
1451     if (nxt[0] == 0)
1452         return(1);
1453     if (nxt[0] != '-')
1454         return(0);
1455 
1456     nxt++;
1457     cur = nxt;
1458     /* now we can just have a variant */
1459     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1461            nxt++;
1462 
1463     if ((nxt - cur < 5) || (nxt - cur > 8))
1464         return(0);
1465 
1466     /* we parsed a variant */
1467 variant:
1468     if (nxt[0] == 0)
1469         return(1);
1470     if (nxt[0] != '-')
1471         return(0);
1472     /* extensions and private use subtags not checked */
1473     return (1);
1474 
1475 region_m49:
1476     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1478         nxt += 3;
1479         goto region;
1480     }
1481     return(0);
1482 }
1483 
1484 /************************************************************************
1485  *									*
1486  *		Parser stacks related functions and macros		*
1487  *									*
1488  ************************************************************************/
1489 
1490 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491                                             const xmlChar ** str);
1492 
1493 #ifdef SAX2
1494 /**
1495  * nsPush:
1496  * @ctxt:  an XML parser context
1497  * @prefix:  the namespace prefix or NULL
1498  * @URL:  the namespace name
1499  *
1500  * Pushes a new parser namespace on top of the ns stack
1501  *
1502  * Returns -1 in case of error, -2 if the namespace should be discarded
1503  *	   and the index in the stack otherwise.
1504  */
1505 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1506 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1507 {
1508     if (ctxt->options & XML_PARSE_NSCLEAN) {
1509         int i;
1510 	for (i = 0;i < ctxt->nsNr;i += 2) {
1511 	    if (ctxt->nsTab[i] == prefix) {
1512 		/* in scope */
1513 	        if (ctxt->nsTab[i + 1] == URL)
1514 		    return(-2);
1515 		/* out of scope keep it */
1516 		break;
1517 	    }
1518 	}
1519     }
1520     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1521 	ctxt->nsMax = 10;
1522 	ctxt->nsNr = 0;
1523 	ctxt->nsTab = (const xmlChar **)
1524 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525 	if (ctxt->nsTab == NULL) {
1526 	    xmlErrMemory(ctxt, NULL);
1527 	    ctxt->nsMax = 0;
1528             return (-1);
1529 	}
1530     } else if (ctxt->nsNr >= ctxt->nsMax) {
1531         const xmlChar ** tmp;
1532         ctxt->nsMax *= 2;
1533         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1535         if (tmp == NULL) {
1536             xmlErrMemory(ctxt, NULL);
1537 	    ctxt->nsMax /= 2;
1538             return (-1);
1539         }
1540 	ctxt->nsTab = tmp;
1541     }
1542     ctxt->nsTab[ctxt->nsNr++] = prefix;
1543     ctxt->nsTab[ctxt->nsNr++] = URL;
1544     return (ctxt->nsNr);
1545 }
1546 /**
1547  * nsPop:
1548  * @ctxt: an XML parser context
1549  * @nr:  the number to pop
1550  *
1551  * Pops the top @nr parser prefix/namespace from the ns stack
1552  *
1553  * Returns the number of namespaces removed
1554  */
1555 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1556 nsPop(xmlParserCtxtPtr ctxt, int nr)
1557 {
1558     int i;
1559 
1560     if (ctxt->nsTab == NULL) return(0);
1561     if (ctxt->nsNr < nr) {
1562         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1563         nr = ctxt->nsNr;
1564     }
1565     if (ctxt->nsNr <= 0)
1566         return (0);
1567 
1568     for (i = 0;i < nr;i++) {
1569          ctxt->nsNr--;
1570 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1571     }
1572     return(nr);
1573 }
1574 #endif
1575 
1576 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1577 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578     const xmlChar **atts;
1579     int *attallocs;
1580     int maxatts;
1581 
1582     if (ctxt->atts == NULL) {
1583 	maxatts = 55; /* allow for 10 attrs by default */
1584 	atts = (const xmlChar **)
1585 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1586 	if (atts == NULL) goto mem_error;
1587 	ctxt->atts = atts;
1588 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589 	if (attallocs == NULL) goto mem_error;
1590 	ctxt->attallocs = attallocs;
1591 	ctxt->maxatts = maxatts;
1592     } else if (nr + 5 > ctxt->maxatts) {
1593 	maxatts = (nr + 5) * 2;
1594 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595 				     maxatts * sizeof(const xmlChar *));
1596 	if (atts == NULL) goto mem_error;
1597 	ctxt->atts = atts;
1598 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599 	                             (maxatts / 5) * sizeof(int));
1600 	if (attallocs == NULL) goto mem_error;
1601 	ctxt->attallocs = attallocs;
1602 	ctxt->maxatts = maxatts;
1603     }
1604     return(ctxt->maxatts);
1605 mem_error:
1606     xmlErrMemory(ctxt, NULL);
1607     return(-1);
1608 }
1609 
1610 /**
1611  * inputPush:
1612  * @ctxt:  an XML parser context
1613  * @value:  the parser input
1614  *
1615  * Pushes a new parser input on top of the input stack
1616  *
1617  * Returns -1 in case of error, the index in the stack otherwise
1618  */
1619 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1620 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1621 {
1622     if ((ctxt == NULL) || (value == NULL))
1623         return(-1);
1624     if (ctxt->inputNr >= ctxt->inputMax) {
1625         ctxt->inputMax *= 2;
1626         ctxt->inputTab =
1627             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1628                                              ctxt->inputMax *
1629                                              sizeof(ctxt->inputTab[0]));
1630         if (ctxt->inputTab == NULL) {
1631             xmlErrMemory(ctxt, NULL);
1632 	    xmlFreeInputStream(value);
1633 	    ctxt->inputMax /= 2;
1634 	    value = NULL;
1635             return (-1);
1636         }
1637     }
1638     ctxt->inputTab[ctxt->inputNr] = value;
1639     ctxt->input = value;
1640     return (ctxt->inputNr++);
1641 }
1642 /**
1643  * inputPop:
1644  * @ctxt: an XML parser context
1645  *
1646  * Pops the top parser input from the input stack
1647  *
1648  * Returns the input just removed
1649  */
1650 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1651 inputPop(xmlParserCtxtPtr ctxt)
1652 {
1653     xmlParserInputPtr ret;
1654 
1655     if (ctxt == NULL)
1656         return(NULL);
1657     if (ctxt->inputNr <= 0)
1658         return (NULL);
1659     ctxt->inputNr--;
1660     if (ctxt->inputNr > 0)
1661         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1662     else
1663         ctxt->input = NULL;
1664     ret = ctxt->inputTab[ctxt->inputNr];
1665     ctxt->inputTab[ctxt->inputNr] = NULL;
1666     return (ret);
1667 }
1668 /**
1669  * nodePush:
1670  * @ctxt:  an XML parser context
1671  * @value:  the element node
1672  *
1673  * Pushes a new element node on top of the node stack
1674  *
1675  * Returns -1 in case of error, the index in the stack otherwise
1676  */
1677 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1678 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1679 {
1680     if (ctxt == NULL) return(0);
1681     if (ctxt->nodeNr >= ctxt->nodeMax) {
1682         xmlNodePtr *tmp;
1683 
1684 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1685                                       ctxt->nodeMax * 2 *
1686                                       sizeof(ctxt->nodeTab[0]));
1687         if (tmp == NULL) {
1688             xmlErrMemory(ctxt, NULL);
1689             return (-1);
1690         }
1691         ctxt->nodeTab = tmp;
1692 	ctxt->nodeMax *= 2;
1693     }
1694     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1696 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1697 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1698 			  xmlParserMaxDepth);
1699 	ctxt->instate = XML_PARSER_EOF;
1700 	return(-1);
1701     }
1702     ctxt->nodeTab[ctxt->nodeNr] = value;
1703     ctxt->node = value;
1704     return (ctxt->nodeNr++);
1705 }
1706 
1707 /**
1708  * nodePop:
1709  * @ctxt: an XML parser context
1710  *
1711  * Pops the top element node from the node stack
1712  *
1713  * Returns the node just removed
1714  */
1715 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1716 nodePop(xmlParserCtxtPtr ctxt)
1717 {
1718     xmlNodePtr ret;
1719 
1720     if (ctxt == NULL) return(NULL);
1721     if (ctxt->nodeNr <= 0)
1722         return (NULL);
1723     ctxt->nodeNr--;
1724     if (ctxt->nodeNr > 0)
1725         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1726     else
1727         ctxt->node = NULL;
1728     ret = ctxt->nodeTab[ctxt->nodeNr];
1729     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1730     return (ret);
1731 }
1732 
1733 #ifdef LIBXML_PUSH_ENABLED
1734 /**
1735  * nameNsPush:
1736  * @ctxt:  an XML parser context
1737  * @value:  the element name
1738  * @prefix:  the element prefix
1739  * @URI:  the element namespace name
1740  *
1741  * Pushes a new element name/prefix/URL on top of the name stack
1742  *
1743  * Returns -1 in case of error, the index in the stack otherwise
1744  */
1745 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1746 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1748 {
1749     if (ctxt->nameNr >= ctxt->nameMax) {
1750         const xmlChar * *tmp;
1751         void **tmp2;
1752         ctxt->nameMax *= 2;
1753         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1754                                     ctxt->nameMax *
1755                                     sizeof(ctxt->nameTab[0]));
1756         if (tmp == NULL) {
1757 	    ctxt->nameMax /= 2;
1758 	    goto mem_error;
1759         }
1760 	ctxt->nameTab = tmp;
1761         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1762                                     ctxt->nameMax * 3 *
1763                                     sizeof(ctxt->pushTab[0]));
1764         if (tmp2 == NULL) {
1765 	    ctxt->nameMax /= 2;
1766 	    goto mem_error;
1767         }
1768 	ctxt->pushTab = tmp2;
1769     }
1770     ctxt->nameTab[ctxt->nameNr] = value;
1771     ctxt->name = value;
1772     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1774     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1775     return (ctxt->nameNr++);
1776 mem_error:
1777     xmlErrMemory(ctxt, NULL);
1778     return (-1);
1779 }
1780 /**
1781  * nameNsPop:
1782  * @ctxt: an XML parser context
1783  *
1784  * Pops the top element/prefix/URI name from the name stack
1785  *
1786  * Returns the name just removed
1787  */
1788 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1789 nameNsPop(xmlParserCtxtPtr ctxt)
1790 {
1791     const xmlChar *ret;
1792 
1793     if (ctxt->nameNr <= 0)
1794         return (NULL);
1795     ctxt->nameNr--;
1796     if (ctxt->nameNr > 0)
1797         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1798     else
1799         ctxt->name = NULL;
1800     ret = ctxt->nameTab[ctxt->nameNr];
1801     ctxt->nameTab[ctxt->nameNr] = NULL;
1802     return (ret);
1803 }
1804 #endif /* LIBXML_PUSH_ENABLED */
1805 
1806 /**
1807  * namePush:
1808  * @ctxt:  an XML parser context
1809  * @value:  the element name
1810  *
1811  * Pushes a new element name on top of the name stack
1812  *
1813  * Returns -1 in case of error, the index in the stack otherwise
1814  */
1815 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1816 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1817 {
1818     if (ctxt == NULL) return (-1);
1819 
1820     if (ctxt->nameNr >= ctxt->nameMax) {
1821         const xmlChar * *tmp;
1822         ctxt->nameMax *= 2;
1823         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1824                                     ctxt->nameMax *
1825                                     sizeof(ctxt->nameTab[0]));
1826         if (tmp == NULL) {
1827 	    ctxt->nameMax /= 2;
1828 	    goto mem_error;
1829         }
1830 	ctxt->nameTab = tmp;
1831     }
1832     ctxt->nameTab[ctxt->nameNr] = value;
1833     ctxt->name = value;
1834     return (ctxt->nameNr++);
1835 mem_error:
1836     xmlErrMemory(ctxt, NULL);
1837     return (-1);
1838 }
1839 /**
1840  * namePop:
1841  * @ctxt: an XML parser context
1842  *
1843  * Pops the top element name from the name stack
1844  *
1845  * Returns the name just removed
1846  */
1847 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1848 namePop(xmlParserCtxtPtr ctxt)
1849 {
1850     const xmlChar *ret;
1851 
1852     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1853         return (NULL);
1854     ctxt->nameNr--;
1855     if (ctxt->nameNr > 0)
1856         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1857     else
1858         ctxt->name = NULL;
1859     ret = ctxt->nameTab[ctxt->nameNr];
1860     ctxt->nameTab[ctxt->nameNr] = NULL;
1861     return (ret);
1862 }
1863 
spacePush(xmlParserCtxtPtr ctxt,int val)1864 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1865     if (ctxt->spaceNr >= ctxt->spaceMax) {
1866         int *tmp;
1867 
1868 	ctxt->spaceMax *= 2;
1869         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1871         if (tmp == NULL) {
1872 	    xmlErrMemory(ctxt, NULL);
1873 	    ctxt->spaceMax /=2;
1874 	    return(-1);
1875 	}
1876 	ctxt->spaceTab = tmp;
1877     }
1878     ctxt->spaceTab[ctxt->spaceNr] = val;
1879     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880     return(ctxt->spaceNr++);
1881 }
1882 
spacePop(xmlParserCtxtPtr ctxt)1883 static int spacePop(xmlParserCtxtPtr ctxt) {
1884     int ret;
1885     if (ctxt->spaceNr <= 0) return(0);
1886     ctxt->spaceNr--;
1887     if (ctxt->spaceNr > 0)
1888 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1889     else
1890         ctxt->space = &ctxt->spaceTab[0];
1891     ret = ctxt->spaceTab[ctxt->spaceNr];
1892     ctxt->spaceTab[ctxt->spaceNr] = -1;
1893     return(ret);
1894 }
1895 
1896 /*
1897  * Macros for accessing the content. Those should be used only by the parser,
1898  * and not exported.
1899  *
1900  * Dirty macros, i.e. one often need to make assumption on the context to
1901  * use them
1902  *
1903  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1904  *           To be used with extreme caution since operations consuming
1905  *           characters may move the input buffer to a different location !
1906  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1907  *           This should be used internally by the parser
1908  *           only to compare to ASCII values otherwise it would break when
1909  *           running with UTF-8 encoding.
1910  *   RAW     same as CUR but in the input buffer, bypass any token
1911  *           extraction that may have been done
1912  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1913  *           to compare on ASCII based substring.
1914  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1915  *           strings without newlines within the parser.
1916  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917  *           defined char within the parser.
1918  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1919  *
1920  *   NEXT    Skip to the next character, this does the proper decoding
1921  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1922  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1923  *   CUR_CHAR(l) returns the current unicode character (int), set l
1924  *           to the number of xmlChars used for the encoding [0-5].
1925  *   CUR_SCHAR  same but operate on a string instead of the context
1926  *   COPY_BUF  copy the current unicode char to the target buffer, increment
1927  *            the index
1928  *   GROW, SHRINK  handling of input buffers
1929  */
1930 
1931 #define RAW (*ctxt->input->cur)
1932 #define CUR (*ctxt->input->cur)
1933 #define NXT(val) ctxt->input->cur[(val)]
1934 #define CUR_PTR ctxt->input->cur
1935 
1936 #define CMP4( s, c1, c2, c3, c4 ) \
1937   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1940   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949     ((unsigned char *) s)[ 8 ] == c9 )
1950 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952     ((unsigned char *) s)[ 9 ] == c10 )
1953 
1954 #define SKIP(val) do {							\
1955     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
1956     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1957     if ((*ctxt->input->cur == 0) &&					\
1958         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1959 	    xmlPopInput(ctxt);						\
1960   } while (0)
1961 
1962 #define SKIPL(val) do {							\
1963     int skipl;								\
1964     for(skipl=0; skipl<val; skipl++) {					\
1965     	if (*(ctxt->input->cur) == '\n') {				\
1966 	ctxt->input->line++; ctxt->input->col = 1;			\
1967     	} else ctxt->input->col++;					\
1968     	ctxt->nbChars++;						\
1969 	ctxt->input->cur++;						\
1970     }									\
1971     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1972     if ((*ctxt->input->cur == 0) &&					\
1973         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1974 	    xmlPopInput(ctxt);						\
1975   } while (0)
1976 
1977 #define SHRINK if ((ctxt->progressive == 0) &&				\
1978 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1980 	xmlSHRINK (ctxt);
1981 
xmlSHRINK(xmlParserCtxtPtr ctxt)1982 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983     xmlParserInputShrink(ctxt->input);
1984     if ((*ctxt->input->cur == 0) &&
1985         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1986 	    xmlPopInput(ctxt);
1987   }
1988 
1989 #define GROW if ((ctxt->progressive == 0) &&				\
1990 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
1991 	xmlGROW (ctxt);
1992 
xmlGROW(xmlParserCtxtPtr ctxt)1993 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1995     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1996         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1997 	    xmlPopInput(ctxt);
1998 }
1999 
2000 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2001 
2002 #define NEXT xmlNextChar(ctxt)
2003 
2004 #define NEXT1 {								\
2005 	ctxt->input->col++;						\
2006 	ctxt->input->cur++;						\
2007 	ctxt->nbChars++;						\
2008 	if (*ctxt->input->cur == 0)					\
2009 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2010     }
2011 
2012 #define NEXTL(l) do {							\
2013     if (*(ctxt->input->cur) == '\n') {					\
2014 	ctxt->input->line++; ctxt->input->col = 1;			\
2015     } else ctxt->input->col++;						\
2016     ctxt->input->cur += l;				\
2017     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2018   } while (0)
2019 
2020 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2022 
2023 #define COPY_BUF(l,b,i,v)						\
2024     if (l == 1) b[i++] = (xmlChar) v;					\
2025     else i += xmlCopyCharMultiByte(&b[i],v)
2026 
2027 /**
2028  * xmlSkipBlankChars:
2029  * @ctxt:  the XML parser context
2030  *
2031  * skip all blanks character found at that point in the input streams.
2032  * It pops up finished entities in the process if allowable at that point.
2033  *
2034  * Returns the number of space chars skipped
2035  */
2036 
2037 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2038 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2039     int res = 0;
2040 
2041     /*
2042      * It's Okay to use CUR/NEXT here since all the blanks are on
2043      * the ASCII range.
2044      */
2045     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2046 	const xmlChar *cur;
2047 	/*
2048 	 * if we are in the document content, go really fast
2049 	 */
2050 	cur = ctxt->input->cur;
2051 	while (IS_BLANK_CH(*cur)) {
2052 	    if (*cur == '\n') {
2053 		ctxt->input->line++; ctxt->input->col = 1;
2054 	    }
2055 	    cur++;
2056 	    res++;
2057 	    if (*cur == 0) {
2058 		ctxt->input->cur = cur;
2059 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060 		cur = ctxt->input->cur;
2061 	    }
2062 	}
2063 	ctxt->input->cur = cur;
2064     } else {
2065 	int cur;
2066 	do {
2067 	    cur = CUR;
2068 	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2069 		NEXT;
2070 		cur = CUR;
2071 		res++;
2072 	    }
2073 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2074 		   (ctxt->instate != XML_PARSER_COMMENT)) {
2075 		xmlPopInput(ctxt);
2076 		cur = CUR;
2077 	    }
2078 	    /*
2079 	     * Need to handle support of entities branching here
2080 	     */
2081 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082 	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2083     }
2084     return(res);
2085 }
2086 
2087 /************************************************************************
2088  *									*
2089  *		Commodity functions to handle entities			*
2090  *									*
2091  ************************************************************************/
2092 
2093 /**
2094  * xmlPopInput:
2095  * @ctxt:  an XML parser context
2096  *
2097  * xmlPopInput: the current input pointed by ctxt->input came to an end
2098  *          pop it and return the next char.
2099  *
2100  * Returns the current xmlChar in the parser context
2101  */
2102 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2103 xmlPopInput(xmlParserCtxtPtr ctxt) {
2104     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2105     if (xmlParserDebugEntities)
2106 	xmlGenericError(xmlGenericErrorContext,
2107 		"Popping input %d\n", ctxt->inputNr);
2108     xmlFreeInputStream(inputPop(ctxt));
2109     if ((*ctxt->input->cur == 0) &&
2110         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111 	    return(xmlPopInput(ctxt));
2112     return(CUR);
2113 }
2114 
2115 /**
2116  * xmlPushInput:
2117  * @ctxt:  an XML parser context
2118  * @input:  an XML parser input fragment (entity, XML fragment ...).
2119  *
2120  * xmlPushInput: switch to a new input stream which is stacked on top
2121  *               of the previous one(s).
2122  * Returns -1 in case of error or the index in the input stack
2123  */
2124 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2125 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2126     int ret;
2127     if (input == NULL) return(-1);
2128 
2129     if (xmlParserDebugEntities) {
2130 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2131 	    xmlGenericError(xmlGenericErrorContext,
2132 		    "%s(%d): ", ctxt->input->filename,
2133 		    ctxt->input->line);
2134 	xmlGenericError(xmlGenericErrorContext,
2135 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2136     }
2137     ret = inputPush(ctxt, input);
2138     GROW;
2139     return(ret);
2140 }
2141 
2142 /**
2143  * xmlParseCharRef:
2144  * @ctxt:  an XML parser context
2145  *
2146  * parse Reference declarations
2147  *
2148  * [66] CharRef ::= '&#' [0-9]+ ';' |
2149  *                  '&#x' [0-9a-fA-F]+ ';'
2150  *
2151  * [ WFC: Legal Character ]
2152  * Characters referred to using character references must match the
2153  * production for Char.
2154  *
2155  * Returns the value parsed (as an int), 0 in case of error
2156  */
2157 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2158 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2159     unsigned int val = 0;
2160     int count = 0;
2161     unsigned int outofrange = 0;
2162 
2163     /*
2164      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2165      */
2166     if ((RAW == '&') && (NXT(1) == '#') &&
2167         (NXT(2) == 'x')) {
2168 	SKIP(3);
2169 	GROW;
2170 	while (RAW != ';') { /* loop blocked by count */
2171 	    if (count++ > 20) {
2172 		count = 0;
2173 		GROW;
2174 	    }
2175 	    if ((RAW >= '0') && (RAW <= '9'))
2176 	        val = val * 16 + (CUR - '0');
2177 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178 	        val = val * 16 + (CUR - 'a') + 10;
2179 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180 	        val = val * 16 + (CUR - 'A') + 10;
2181 	    else {
2182 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2183 		val = 0;
2184 		break;
2185 	    }
2186 	    if (val > 0x10FFFF)
2187 	        outofrange = val;
2188 
2189 	    NEXT;
2190 	    count++;
2191 	}
2192 	if (RAW == ';') {
2193 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2194 	    ctxt->input->col++;
2195 	    ctxt->nbChars ++;
2196 	    ctxt->input->cur++;
2197 	}
2198     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2199 	SKIP(2);
2200 	GROW;
2201 	while (RAW != ';') { /* loop blocked by count */
2202 	    if (count++ > 20) {
2203 		count = 0;
2204 		GROW;
2205 	    }
2206 	    if ((RAW >= '0') && (RAW <= '9'))
2207 	        val = val * 10 + (CUR - '0');
2208 	    else {
2209 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2210 		val = 0;
2211 		break;
2212 	    }
2213 	    if (val > 0x10FFFF)
2214 	        outofrange = val;
2215 
2216 	    NEXT;
2217 	    count++;
2218 	}
2219 	if (RAW == ';') {
2220 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2221 	    ctxt->input->col++;
2222 	    ctxt->nbChars ++;
2223 	    ctxt->input->cur++;
2224 	}
2225     } else {
2226         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2227     }
2228 
2229     /*
2230      * [ WFC: Legal Character ]
2231      * Characters referred to using character references must match the
2232      * production for Char.
2233      */
2234     if ((IS_CHAR(val) && (outofrange == 0))) {
2235         return(val);
2236     } else {
2237         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238                           "xmlParseCharRef: invalid xmlChar value %d\n",
2239 	                  val);
2240     }
2241     return(0);
2242 }
2243 
2244 /**
2245  * xmlParseStringCharRef:
2246  * @ctxt:  an XML parser context
2247  * @str:  a pointer to an index in the string
2248  *
2249  * parse Reference declarations, variant parsing from a string rather
2250  * than an an input flow.
2251  *
2252  * [66] CharRef ::= '&#' [0-9]+ ';' |
2253  *                  '&#x' [0-9a-fA-F]+ ';'
2254  *
2255  * [ WFC: Legal Character ]
2256  * Characters referred to using character references must match the
2257  * production for Char.
2258  *
2259  * Returns the value parsed (as an int), 0 in case of error, str will be
2260  *         updated to the current value of the index
2261  */
2262 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2263 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2264     const xmlChar *ptr;
2265     xmlChar cur;
2266     unsigned int val = 0;
2267     unsigned int outofrange = 0;
2268 
2269     if ((str == NULL) || (*str == NULL)) return(0);
2270     ptr = *str;
2271     cur = *ptr;
2272     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2273 	ptr += 3;
2274 	cur = *ptr;
2275 	while (cur != ';') { /* Non input consuming loop */
2276 	    if ((cur >= '0') && (cur <= '9'))
2277 	        val = val * 16 + (cur - '0');
2278 	    else if ((cur >= 'a') && (cur <= 'f'))
2279 	        val = val * 16 + (cur - 'a') + 10;
2280 	    else if ((cur >= 'A') && (cur <= 'F'))
2281 	        val = val * 16 + (cur - 'A') + 10;
2282 	    else {
2283 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284 		val = 0;
2285 		break;
2286 	    }
2287 	    if (val > 0x10FFFF)
2288 	        outofrange = val;
2289 
2290 	    ptr++;
2291 	    cur = *ptr;
2292 	}
2293 	if (cur == ';')
2294 	    ptr++;
2295     } else if  ((cur == '&') && (ptr[1] == '#')){
2296 	ptr += 2;
2297 	cur = *ptr;
2298 	while (cur != ';') { /* Non input consuming loops */
2299 	    if ((cur >= '0') && (cur <= '9'))
2300 	        val = val * 10 + (cur - '0');
2301 	    else {
2302 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2303 		val = 0;
2304 		break;
2305 	    }
2306 	    if (val > 0x10FFFF)
2307 	        outofrange = val;
2308 
2309 	    ptr++;
2310 	    cur = *ptr;
2311 	}
2312 	if (cur == ';')
2313 	    ptr++;
2314     } else {
2315 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2316 	return(0);
2317     }
2318     *str = ptr;
2319 
2320     /*
2321      * [ WFC: Legal Character ]
2322      * Characters referred to using character references must match the
2323      * production for Char.
2324      */
2325     if ((IS_CHAR(val) && (outofrange == 0))) {
2326         return(val);
2327     } else {
2328         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2330 			  val);
2331     }
2332     return(0);
2333 }
2334 
2335 /**
2336  * xmlNewBlanksWrapperInputStream:
2337  * @ctxt:  an XML parser context
2338  * @entity:  an Entity pointer
2339  *
2340  * Create a new input stream for wrapping
2341  * blanks around a PEReference
2342  *
2343  * Returns the new input stream or NULL
2344  */
2345 
deallocblankswrapper(xmlChar * str)2346 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2347 
2348 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2349 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350     xmlParserInputPtr input;
2351     xmlChar *buffer;
2352     size_t length;
2353     if (entity == NULL) {
2354 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355 	            "xmlNewBlanksWrapperInputStream entity\n");
2356 	return(NULL);
2357     }
2358     if (xmlParserDebugEntities)
2359 	xmlGenericError(xmlGenericErrorContext,
2360 		"new blanks wrapper for entity: %s\n", entity->name);
2361     input = xmlNewInputStream(ctxt);
2362     if (input == NULL) {
2363 	return(NULL);
2364     }
2365     length = xmlStrlen(entity->name) + 5;
2366     buffer = xmlMallocAtomic(length);
2367     if (buffer == NULL) {
2368 	xmlErrMemory(ctxt, NULL);
2369         xmlFree(input);
2370     	return(NULL);
2371     }
2372     buffer [0] = ' ';
2373     buffer [1] = '%';
2374     buffer [length-3] = ';';
2375     buffer [length-2] = ' ';
2376     buffer [length-1] = 0;
2377     memcpy(buffer + 2, entity->name, length - 5);
2378     input->free = deallocblankswrapper;
2379     input->base = buffer;
2380     input->cur = buffer;
2381     input->length = length;
2382     input->end = &buffer[length];
2383     return(input);
2384 }
2385 
2386 /**
2387  * xmlParserHandlePEReference:
2388  * @ctxt:  the parser context
2389  *
2390  * [69] PEReference ::= '%' Name ';'
2391  *
2392  * [ WFC: No Recursion ]
2393  * A parsed entity must not contain a recursive
2394  * reference to itself, either directly or indirectly.
2395  *
2396  * [ WFC: Entity Declared ]
2397  * In a document without any DTD, a document with only an internal DTD
2398  * subset which contains no parameter entity references, or a document
2399  * with "standalone='yes'", ...  ... The declaration of a parameter
2400  * entity must precede any reference to it...
2401  *
2402  * [ VC: Entity Declared ]
2403  * In a document with an external subset or external parameter entities
2404  * with "standalone='no'", ...  ... The declaration of a parameter entity
2405  * must precede any reference to it...
2406  *
2407  * [ WFC: In DTD ]
2408  * Parameter-entity references may only appear in the DTD.
2409  * NOTE: misleading but this is handled.
2410  *
2411  * A PEReference may have been detected in the current input stream
2412  * the handling is done accordingly to
2413  *      http://www.w3.org/TR/REC-xml#entproc
2414  * i.e.
2415  *   - Included in literal in entity values
2416  *   - Included as Parameter Entity reference within DTDs
2417  */
2418 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2419 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2420     const xmlChar *name;
2421     xmlEntityPtr entity = NULL;
2422     xmlParserInputPtr input;
2423 
2424     if (RAW != '%') return;
2425     switch(ctxt->instate) {
2426 	case XML_PARSER_CDATA_SECTION:
2427 	    return;
2428         case XML_PARSER_COMMENT:
2429 	    return;
2430 	case XML_PARSER_START_TAG:
2431 	    return;
2432 	case XML_PARSER_END_TAG:
2433 	    return;
2434         case XML_PARSER_EOF:
2435 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2436 	    return;
2437         case XML_PARSER_PROLOG:
2438 	case XML_PARSER_START:
2439 	case XML_PARSER_MISC:
2440 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2441 	    return;
2442 	case XML_PARSER_ENTITY_DECL:
2443         case XML_PARSER_CONTENT:
2444         case XML_PARSER_ATTRIBUTE_VALUE:
2445         case XML_PARSER_PI:
2446 	case XML_PARSER_SYSTEM_LITERAL:
2447 	case XML_PARSER_PUBLIC_LITERAL:
2448 	    /* we just ignore it there */
2449 	    return;
2450         case XML_PARSER_EPILOG:
2451 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2452 	    return;
2453 	case XML_PARSER_ENTITY_VALUE:
2454 	    /*
2455 	     * NOTE: in the case of entity values, we don't do the
2456 	     *       substitution here since we need the literal
2457 	     *       entity value to be able to save the internal
2458 	     *       subset of the document.
2459 	     *       This will be handled by xmlStringDecodeEntities
2460 	     */
2461 	    return;
2462         case XML_PARSER_DTD:
2463 	    /*
2464 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465 	     * In the internal DTD subset, parameter-entity references
2466 	     * can occur only where markup declarations can occur, not
2467 	     * within markup declarations.
2468 	     * In that case this is handled in xmlParseMarkupDecl
2469 	     */
2470 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2471 		return;
2472 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2473 		return;
2474             break;
2475         case XML_PARSER_IGNORE:
2476             return;
2477     }
2478 
2479     NEXT;
2480     name = xmlParseName(ctxt);
2481     if (xmlParserDebugEntities)
2482 	xmlGenericError(xmlGenericErrorContext,
2483 		"PEReference: %s\n", name);
2484     if (name == NULL) {
2485 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2486     } else {
2487 	if (RAW == ';') {
2488 	    NEXT;
2489 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491 	    if (entity == NULL) {
2492 
2493 		/*
2494 		 * [ WFC: Entity Declared ]
2495 		 * In a document without any DTD, a document with only an
2496 		 * internal DTD subset which contains no parameter entity
2497 		 * references, or a document with "standalone='yes'", ...
2498 		 * ... The declaration of a parameter entity must precede
2499 		 * any reference to it...
2500 		 */
2501 		if ((ctxt->standalone == 1) ||
2502 		    ((ctxt->hasExternalSubset == 0) &&
2503 		     (ctxt->hasPErefs == 0))) {
2504 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2505 			 "PEReference: %%%s; not found\n", name);
2506 	        } else {
2507 		    /*
2508 		     * [ VC: Entity Declared ]
2509 		     * In a document with an external subset or external
2510 		     * parameter entities with "standalone='no'", ...
2511 		     * ... The declaration of a parameter entity must precede
2512 		     * any reference to it...
2513 		     */
2514 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516 			                 "PEReference: %%%s; not found\n",
2517 				         name, NULL);
2518 		    } else
2519 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520 			              "PEReference: %%%s; not found\n",
2521 				      name, NULL);
2522 		    ctxt->valid = 0;
2523 		}
2524 	    } else if (ctxt->input->free != deallocblankswrapper) {
2525 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2526 		    if (xmlPushInput(ctxt, input) < 0)
2527 		        return;
2528 	    } else {
2529 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2531 		    xmlChar start[4];
2532 		    xmlCharEncoding enc;
2533 
2534 		    /*
2535 		     * handle the extra spaces added before and after
2536 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2537 		     * this is done independently.
2538 		     */
2539 		    input = xmlNewEntityInputStream(ctxt, entity);
2540 		    if (xmlPushInput(ctxt, input) < 0)
2541 		        return;
2542 
2543 		    /*
2544 		     * Get the 4 first bytes and decode the charset
2545 		     * if enc != XML_CHAR_ENCODING_NONE
2546 		     * plug some encoding conversion routines.
2547 		     * Note that, since we may have some non-UTF8
2548 		     * encoding (like UTF16, bug 135229), the 'length'
2549 		     * is not known, but we can calculate based upon
2550 		     * the amount of data in the buffer.
2551 		     */
2552 		    GROW
2553 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2554 			start[0] = RAW;
2555 			start[1] = NXT(1);
2556 			start[2] = NXT(2);
2557 			start[3] = NXT(3);
2558 			enc = xmlDetectCharEncoding(start, 4);
2559 			if (enc != XML_CHAR_ENCODING_NONE) {
2560 			    xmlSwitchEncoding(ctxt, enc);
2561 			}
2562 		    }
2563 
2564 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2565 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566 			(IS_BLANK_CH(NXT(5)))) {
2567 			xmlParseTextDecl(ctxt);
2568 		    }
2569 		} else {
2570 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571 			     "PEReference: %s is not a parameter entity\n",
2572 				      name);
2573 		}
2574 	    }
2575 	} else {
2576 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2577 	}
2578     }
2579 }
2580 
2581 /*
2582  * Macro used to grow the current buffer.
2583  */
2584 #define growBuffer(buffer, n) {						\
2585     xmlChar *tmp;							\
2586     buffer##_size *= 2;							\
2587     buffer##_size += n;							\
2588     tmp = (xmlChar *)							\
2589 		xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));	\
2590     if (tmp == NULL) goto mem_error;					\
2591     buffer = tmp;							\
2592 }
2593 
2594 /**
2595  * xmlStringLenDecodeEntities:
2596  * @ctxt:  the parser context
2597  * @str:  the input string
2598  * @len: the string length
2599  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600  * @end:  an end marker xmlChar, 0 if none
2601  * @end2:  an end marker xmlChar, 0 if none
2602  * @end3:  an end marker xmlChar, 0 if none
2603  *
2604  * Takes a entity string content and process to do the adequate substitutions.
2605  *
2606  * [67] Reference ::= EntityRef | CharRef
2607  *
2608  * [69] PEReference ::= '%' Name ';'
2609  *
2610  * Returns A newly allocated string with the substitution done. The caller
2611  *      must deallocate it !
2612  */
2613 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2614 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2616     xmlChar *buffer = NULL;
2617     int buffer_size = 0;
2618 
2619     xmlChar *current = NULL;
2620     xmlChar *rep = NULL;
2621     const xmlChar *last;
2622     xmlEntityPtr ent;
2623     int c,l;
2624     int nbchars = 0;
2625 
2626     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2627 	return(NULL);
2628     last = str + len;
2629 
2630     if (((ctxt->depth > 40) &&
2631          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632 	(ctxt->depth > 1024)) {
2633 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2634 	return(NULL);
2635     }
2636 
2637     /*
2638      * allocate a translation buffer.
2639      */
2640     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2641     buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2642     if (buffer == NULL) goto mem_error;
2643 
2644     /*
2645      * OK loop until we reach one of the ending char or a size limit.
2646      * we are operating on already parsed values.
2647      */
2648     if (str < last)
2649 	c = CUR_SCHAR(str, l);
2650     else
2651         c = 0;
2652     while ((c != 0) && (c != end) && /* non input consuming loop */
2653 	   (c != end2) && (c != end3)) {
2654 
2655 	if (c == 0) break;
2656         if ((c == '&') && (str[1] == '#')) {
2657 	    int val = xmlParseStringCharRef(ctxt, &str);
2658 	    if (val != 0) {
2659 		COPY_BUF(0,buffer,nbchars,val);
2660 	    }
2661 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2663 	    }
2664 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665 	    if (xmlParserDebugEntities)
2666 		xmlGenericError(xmlGenericErrorContext,
2667 			"String decoding Entity Reference: %.30s\n",
2668 			str);
2669 	    ent = xmlParseStringEntityRef(ctxt, &str);
2670 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2672 	        goto int_error;
2673 	    if (ent != NULL)
2674 	        ctxt->nbentities += ent->checked;
2675 	    if ((ent != NULL) &&
2676 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677 		if (ent->content != NULL) {
2678 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2679 		    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2680 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681 		    }
2682 		} else {
2683 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684 			    "predefined entity has no content\n");
2685 		}
2686 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2687 		ctxt->depth++;
2688 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2689 			                      0, 0, 0);
2690 		ctxt->depth--;
2691 
2692 		if (rep != NULL) {
2693 		    current = rep;
2694 		    while (*current != 0) { /* non input consuming loop */
2695 			buffer[nbchars++] = *current++;
2696 			if (nbchars >
2697 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2698 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2699 				goto int_error;
2700 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701 			}
2702 		    }
2703 		    xmlFree(rep);
2704 		    rep = NULL;
2705 		}
2706 	    } else if (ent != NULL) {
2707 		int i = xmlStrlen(ent->name);
2708 		const xmlChar *cur = ent->name;
2709 
2710 		buffer[nbchars++] = '&';
2711 		if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2712 		    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713 		}
2714 		for (;i > 0;i--)
2715 		    buffer[nbchars++] = *cur++;
2716 		buffer[nbchars++] = ';';
2717 	    }
2718 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719 	    if (xmlParserDebugEntities)
2720 		xmlGenericError(xmlGenericErrorContext,
2721 			"String decoding PE Reference: %.30s\n", str);
2722 	    ent = xmlParseStringPEReference(ctxt, &str);
2723 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2724 	        goto int_error;
2725 	    if (ent != NULL)
2726 	        ctxt->nbentities += ent->checked;
2727 	    if (ent != NULL) {
2728                 if (ent->content == NULL) {
2729 		    xmlLoadEntityContent(ctxt, ent);
2730 		}
2731 		ctxt->depth++;
2732 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733 			                      0, 0, 0);
2734 		ctxt->depth--;
2735 		if (rep != NULL) {
2736 		    current = rep;
2737 		    while (*current != 0) { /* non input consuming loop */
2738 			buffer[nbchars++] = *current++;
2739 			if (nbchars >
2740 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2741 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2742 			        goto int_error;
2743 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2744 			}
2745 		    }
2746 		    xmlFree(rep);
2747 		    rep = NULL;
2748 		}
2749 	    }
2750 	} else {
2751 	    COPY_BUF(l,buffer,nbchars,c);
2752 	    str += l;
2753 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2754 	      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2755 	    }
2756 	}
2757 	if (str < last)
2758 	    c = CUR_SCHAR(str, l);
2759 	else
2760 	    c = 0;
2761     }
2762     buffer[nbchars] = 0;
2763     return(buffer);
2764 
2765 mem_error:
2766     xmlErrMemory(ctxt, NULL);
2767 int_error:
2768     if (rep != NULL)
2769         xmlFree(rep);
2770     if (buffer != NULL)
2771         xmlFree(buffer);
2772     return(NULL);
2773 }
2774 
2775 /**
2776  * xmlStringDecodeEntities:
2777  * @ctxt:  the parser context
2778  * @str:  the input string
2779  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780  * @end:  an end marker xmlChar, 0 if none
2781  * @end2:  an end marker xmlChar, 0 if none
2782  * @end3:  an end marker xmlChar, 0 if none
2783  *
2784  * Takes a entity string content and process to do the adequate substitutions.
2785  *
2786  * [67] Reference ::= EntityRef | CharRef
2787  *
2788  * [69] PEReference ::= '%' Name ';'
2789  *
2790  * Returns A newly allocated string with the substitution done. The caller
2791  *      must deallocate it !
2792  */
2793 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2794 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2796     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2797     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2798            end, end2, end3));
2799 }
2800 
2801 /************************************************************************
2802  *									*
2803  *		Commodity functions, cleanup needed ?			*
2804  *									*
2805  ************************************************************************/
2806 
2807 /**
2808  * areBlanks:
2809  * @ctxt:  an XML parser context
2810  * @str:  a xmlChar *
2811  * @len:  the size of @str
2812  * @blank_chars: we know the chars are blanks
2813  *
2814  * Is this a sequence of blank chars that one can ignore ?
2815  *
2816  * Returns 1 if ignorable 0 otherwise.
2817  */
2818 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2819 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2820                      int blank_chars) {
2821     int i, ret;
2822     xmlNodePtr lastChild;
2823 
2824     /*
2825      * Don't spend time trying to differentiate them, the same callback is
2826      * used !
2827      */
2828     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2829 	return(0);
2830 
2831     /*
2832      * Check for xml:space value.
2833      */
2834     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835         (*(ctxt->space) == -2))
2836 	return(0);
2837 
2838     /*
2839      * Check that the string is made of blanks
2840      */
2841     if (blank_chars == 0) {
2842 	for (i = 0;i < len;i++)
2843 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2844     }
2845 
2846     /*
2847      * Look if the element is mixed content in the DTD if available
2848      */
2849     if (ctxt->node == NULL) return(0);
2850     if (ctxt->myDoc != NULL) {
2851 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852         if (ret == 0) return(1);
2853         if (ret == 1) return(0);
2854     }
2855 
2856     /*
2857      * Otherwise, heuristic :-\
2858      */
2859     if ((RAW != '<') && (RAW != 0xD)) return(0);
2860     if ((ctxt->node->children == NULL) &&
2861 	(RAW == '<') && (NXT(1) == '/')) return(0);
2862 
2863     lastChild = xmlGetLastChild(ctxt->node);
2864     if (lastChild == NULL) {
2865         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866             (ctxt->node->content != NULL)) return(0);
2867     } else if (xmlNodeIsText(lastChild))
2868         return(0);
2869     else if ((ctxt->node->children != NULL) &&
2870              (xmlNodeIsText(ctxt->node->children)))
2871         return(0);
2872     return(1);
2873 }
2874 
2875 /************************************************************************
2876  *									*
2877  *		Extra stuff for namespace support			*
2878  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2879  *									*
2880  ************************************************************************/
2881 
2882 /**
2883  * xmlSplitQName:
2884  * @ctxt:  an XML parser context
2885  * @name:  an XML parser context
2886  * @prefix:  a xmlChar **
2887  *
2888  * parse an UTF8 encoded XML qualified name string
2889  *
2890  * [NS 5] QName ::= (Prefix ':')? LocalPart
2891  *
2892  * [NS 6] Prefix ::= NCName
2893  *
2894  * [NS 7] LocalPart ::= NCName
2895  *
2896  * Returns the local part, and prefix is updated
2897  *   to get the Prefix if any.
2898  */
2899 
2900 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2901 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902     xmlChar buf[XML_MAX_NAMELEN + 5];
2903     xmlChar *buffer = NULL;
2904     int len = 0;
2905     int max = XML_MAX_NAMELEN;
2906     xmlChar *ret = NULL;
2907     const xmlChar *cur = name;
2908     int c;
2909 
2910     if (prefix == NULL) return(NULL);
2911     *prefix = NULL;
2912 
2913     if (cur == NULL) return(NULL);
2914 
2915 #ifndef XML_XML_NAMESPACE
2916     /* xml: prefix is not really a namespace */
2917     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918         (cur[2] == 'l') && (cur[3] == ':'))
2919 	return(xmlStrdup(name));
2920 #endif
2921 
2922     /* nasty but well=formed */
2923     if (cur[0] == ':')
2924 	return(xmlStrdup(name));
2925 
2926     c = *cur++;
2927     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2928 	buf[len++] = c;
2929 	c = *cur++;
2930     }
2931     if (len >= max) {
2932 	/*
2933 	 * Okay someone managed to make a huge name, so he's ready to pay
2934 	 * for the processing speed.
2935 	 */
2936 	max = len * 2;
2937 
2938 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2939 	if (buffer == NULL) {
2940 	    xmlErrMemory(ctxt, NULL);
2941 	    return(NULL);
2942 	}
2943 	memcpy(buffer, buf, len);
2944 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945 	    if (len + 10 > max) {
2946 	        xmlChar *tmp;
2947 
2948 		max *= 2;
2949 		tmp = (xmlChar *) xmlRealloc(buffer,
2950 						max * sizeof(xmlChar));
2951 		if (tmp == NULL) {
2952 		    xmlFree(buffer);
2953 		    xmlErrMemory(ctxt, NULL);
2954 		    return(NULL);
2955 		}
2956 		buffer = tmp;
2957 	    }
2958 	    buffer[len++] = c;
2959 	    c = *cur++;
2960 	}
2961 	buffer[len] = 0;
2962     }
2963 
2964     if ((c == ':') && (*cur == 0)) {
2965         if (buffer != NULL)
2966 	    xmlFree(buffer);
2967 	*prefix = NULL;
2968 	return(xmlStrdup(name));
2969     }
2970 
2971     if (buffer == NULL)
2972 	ret = xmlStrndup(buf, len);
2973     else {
2974 	ret = buffer;
2975 	buffer = NULL;
2976 	max = XML_MAX_NAMELEN;
2977     }
2978 
2979 
2980     if (c == ':') {
2981 	c = *cur;
2982         *prefix = ret;
2983 	if (c == 0) {
2984 	    return(xmlStrndup(BAD_CAST "", 0));
2985 	}
2986 	len = 0;
2987 
2988 	/*
2989 	 * Check that the first character is proper to start
2990 	 * a new name
2991 	 */
2992 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993 	      ((c >= 0x41) && (c <= 0x5A)) ||
2994 	      (c == '_') || (c == ':'))) {
2995 	    int l;
2996 	    int first = CUR_SCHAR(cur, l);
2997 
2998 	    if (!IS_LETTER(first) && (first != '_')) {
2999 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3000 			    "Name %s is not XML Namespace compliant\n",
3001 				  name);
3002 	    }
3003 	}
3004 	cur++;
3005 
3006 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3007 	    buf[len++] = c;
3008 	    c = *cur++;
3009 	}
3010 	if (len >= max) {
3011 	    /*
3012 	     * Okay someone managed to make a huge name, so he's ready to pay
3013 	     * for the processing speed.
3014 	     */
3015 	    max = len * 2;
3016 
3017 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018 	    if (buffer == NULL) {
3019 	        xmlErrMemory(ctxt, NULL);
3020 		return(NULL);
3021 	    }
3022 	    memcpy(buffer, buf, len);
3023 	    while (c != 0) { /* tested bigname2.xml */
3024 		if (len + 10 > max) {
3025 		    xmlChar *tmp;
3026 
3027 		    max *= 2;
3028 		    tmp = (xmlChar *) xmlRealloc(buffer,
3029 						    max * sizeof(xmlChar));
3030 		    if (tmp == NULL) {
3031 			xmlErrMemory(ctxt, NULL);
3032 			xmlFree(buffer);
3033 			return(NULL);
3034 		    }
3035 		    buffer = tmp;
3036 		}
3037 		buffer[len++] = c;
3038 		c = *cur++;
3039 	    }
3040 	    buffer[len] = 0;
3041 	}
3042 
3043 	if (buffer == NULL)
3044 	    ret = xmlStrndup(buf, len);
3045 	else {
3046 	    ret = buffer;
3047 	}
3048     }
3049 
3050     return(ret);
3051 }
3052 
3053 /************************************************************************
3054  *									*
3055  *			The parser itself				*
3056  *	Relates to http://www.w3.org/TR/REC-xml				*
3057  *									*
3058  ************************************************************************/
3059 
3060 /************************************************************************
3061  *									*
3062  *	Routines to parse Name, NCName and NmToken			*
3063  *									*
3064  ************************************************************************/
3065 #ifdef DEBUG
3066 static unsigned long nbParseName = 0;
3067 static unsigned long nbParseNmToken = 0;
3068 static unsigned long nbParseNCName = 0;
3069 static unsigned long nbParseNCNameComplex = 0;
3070 static unsigned long nbParseNameComplex = 0;
3071 static unsigned long nbParseStringName = 0;
3072 #endif
3073 
3074 /*
3075  * The two following functions are related to the change of accepted
3076  * characters for Name and NmToken in the Revision 5 of XML-1.0
3077  * They correspond to the modified production [4] and the new production [4a]
3078  * changes in that revision. Also note that the macros used for the
3079  * productions Letter, Digit, CombiningChar and Extender are not needed
3080  * anymore.
3081  * We still keep compatibility to pre-revision5 parsing semantic if the
3082  * new XML_PARSE_OLD10 option is given to the parser.
3083  */
3084 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3085 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087         /*
3088 	 * Use the new checks of production [4] [4a] amd [5] of the
3089 	 * Update 5 of XML-1.0
3090 	 */
3091 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092 	    (((c >= 'a') && (c <= 'z')) ||
3093 	     ((c >= 'A') && (c <= 'Z')) ||
3094 	     (c == '_') || (c == ':') ||
3095 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3096 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3097 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3098 	     ((c >= 0x370) && (c <= 0x37D)) ||
3099 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3101 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3102 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3107 	    return(1);
3108     } else {
3109         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3110 	    return(1);
3111     }
3112     return(0);
3113 }
3114 
3115 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3116 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118         /*
3119 	 * Use the new checks of production [4] [4a] amd [5] of the
3120 	 * Update 5 of XML-1.0
3121 	 */
3122 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123 	    (((c >= 'a') && (c <= 'z')) ||
3124 	     ((c >= 'A') && (c <= 'Z')) ||
3125 	     ((c >= '0') && (c <= '9')) || /* !start */
3126 	     (c == '_') || (c == ':') ||
3127 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3129 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3130 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3131 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132 	     ((c >= 0x370) && (c <= 0x37D)) ||
3133 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3135 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3137 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3142 	     return(1);
3143     } else {
3144         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145             (c == '.') || (c == '-') ||
3146 	    (c == '_') || (c == ':') ||
3147 	    (IS_COMBINING(c)) ||
3148 	    (IS_EXTENDER(c)))
3149 	    return(1);
3150     }
3151     return(0);
3152 }
3153 
3154 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3155                                           int *len, int *alloc, int normalize);
3156 
3157 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3158 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3159     int len = 0, l;
3160     int c;
3161     int count = 0;
3162 
3163 #ifdef DEBUG
3164     nbParseNameComplex++;
3165 #endif
3166 
3167     /*
3168      * Handler for more complex cases
3169      */
3170     GROW;
3171     c = CUR_CHAR(l);
3172     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173         /*
3174 	 * Use the new checks of production [4] [4a] amd [5] of the
3175 	 * Update 5 of XML-1.0
3176 	 */
3177 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178 	    (!(((c >= 'a') && (c <= 'z')) ||
3179 	       ((c >= 'A') && (c <= 'Z')) ||
3180 	       (c == '_') || (c == ':') ||
3181 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3182 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3183 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3184 	       ((c >= 0x370) && (c <= 0x37D)) ||
3185 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3187 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3188 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3193 	    return(NULL);
3194 	}
3195 	len += l;
3196 	NEXTL(l);
3197 	c = CUR_CHAR(l);
3198 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199 	       (((c >= 'a') && (c <= 'z')) ||
3200 	        ((c >= 'A') && (c <= 'Z')) ||
3201 	        ((c >= '0') && (c <= '9')) || /* !start */
3202 	        (c == '_') || (c == ':') ||
3203 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3205 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3206 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3207 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208 	        ((c >= 0x370) && (c <= 0x37D)) ||
3209 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3211 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3213 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3218 		)) {
3219 	    if (count++ > 100) {
3220 		count = 0;
3221 		GROW;
3222 	    }
3223 	    len += l;
3224 	    NEXTL(l);
3225 	    c = CUR_CHAR(l);
3226 	}
3227     } else {
3228 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229 	    (!IS_LETTER(c) && (c != '_') &&
3230 	     (c != ':'))) {
3231 	    return(NULL);
3232 	}
3233 	len += l;
3234 	NEXTL(l);
3235 	c = CUR_CHAR(l);
3236 
3237 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239 		(c == '.') || (c == '-') ||
3240 		(c == '_') || (c == ':') ||
3241 		(IS_COMBINING(c)) ||
3242 		(IS_EXTENDER(c)))) {
3243 	    if (count++ > 100) {
3244 		count = 0;
3245 		GROW;
3246 	    }
3247 	    len += l;
3248 	    NEXTL(l);
3249 	    c = CUR_CHAR(l);
3250 	}
3251     }
3252     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3255 }
3256 
3257 /**
3258  * xmlParseName:
3259  * @ctxt:  an XML parser context
3260  *
3261  * parse an XML name.
3262  *
3263  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264  *                  CombiningChar | Extender
3265  *
3266  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3267  *
3268  * [6] Names ::= Name (#x20 Name)*
3269  *
3270  * Returns the Name parsed or NULL
3271  */
3272 
3273 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3274 xmlParseName(xmlParserCtxtPtr ctxt) {
3275     const xmlChar *in;
3276     const xmlChar *ret;
3277     int count = 0;
3278 
3279     GROW;
3280 
3281 #ifdef DEBUG
3282     nbParseName++;
3283 #endif
3284 
3285     /*
3286      * Accelerator for simple ASCII names
3287      */
3288     in = ctxt->input->cur;
3289     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290 	((*in >= 0x41) && (*in <= 0x5A)) ||
3291 	(*in == '_') || (*in == ':')) {
3292 	in++;
3293 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3295 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3296 	       (*in == '_') || (*in == '-') ||
3297 	       (*in == ':') || (*in == '.'))
3298 	    in++;
3299 	if ((*in > 0) && (*in < 0x80)) {
3300 	    count = in - ctxt->input->cur;
3301 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3302 	    ctxt->input->cur = in;
3303 	    ctxt->nbChars += count;
3304 	    ctxt->input->col += count;
3305 	    if (ret == NULL)
3306 	        xmlErrMemory(ctxt, NULL);
3307 	    return(ret);
3308 	}
3309     }
3310     /* accelerator for special cases */
3311     return(xmlParseNameComplex(ctxt));
3312 }
3313 
3314 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3315 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3316     int len = 0, l;
3317     int c;
3318     int count = 0;
3319 
3320 #ifdef DEBUG
3321     nbParseNCNameComplex++;
3322 #endif
3323 
3324     /*
3325      * Handler for more complex cases
3326      */
3327     GROW;
3328     c = CUR_CHAR(l);
3329     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3331 	return(NULL);
3332     }
3333 
3334     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336 	if (count++ > 100) {
3337 	    count = 0;
3338 	    GROW;
3339 	}
3340 	len += l;
3341 	NEXTL(l);
3342 	c = CUR_CHAR(l);
3343     }
3344     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3345 }
3346 
3347 /**
3348  * xmlParseNCName:
3349  * @ctxt:  an XML parser context
3350  * @len:  lenght of the string parsed
3351  *
3352  * parse an XML name.
3353  *
3354  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355  *                      CombiningChar | Extender
3356  *
3357  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3358  *
3359  * Returns the Name parsed or NULL
3360  */
3361 
3362 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3363 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3364     const xmlChar *in;
3365     const xmlChar *ret;
3366     int count = 0;
3367 
3368 #ifdef DEBUG
3369     nbParseNCName++;
3370 #endif
3371 
3372     /*
3373      * Accelerator for simple ASCII names
3374      */
3375     in = ctxt->input->cur;
3376     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 	((*in >= 0x41) && (*in <= 0x5A)) ||
3378 	(*in == '_')) {
3379 	in++;
3380 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3382 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3383 	       (*in == '_') || (*in == '-') ||
3384 	       (*in == '.'))
3385 	    in++;
3386 	if ((*in > 0) && (*in < 0x80)) {
3387 	    count = in - ctxt->input->cur;
3388 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 	    ctxt->input->cur = in;
3390 	    ctxt->nbChars += count;
3391 	    ctxt->input->col += count;
3392 	    if (ret == NULL) {
3393 	        xmlErrMemory(ctxt, NULL);
3394 	    }
3395 	    return(ret);
3396 	}
3397     }
3398     return(xmlParseNCNameComplex(ctxt));
3399 }
3400 
3401 /**
3402  * xmlParseNameAndCompare:
3403  * @ctxt:  an XML parser context
3404  *
3405  * parse an XML name and compares for match
3406  * (specialized for endtag parsing)
3407  *
3408  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409  * and the name for mismatch
3410  */
3411 
3412 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3413 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3414     register const xmlChar *cmp = other;
3415     register const xmlChar *in;
3416     const xmlChar *ret;
3417 
3418     GROW;
3419 
3420     in = ctxt->input->cur;
3421     while (*in != 0 && *in == *cmp) {
3422 	++in;
3423 	++cmp;
3424 	ctxt->input->col++;
3425     }
3426     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3427 	/* success */
3428 	ctxt->input->cur = in;
3429 	return (const xmlChar*) 1;
3430     }
3431     /* failure (or end of input buffer), check with full function */
3432     ret = xmlParseName (ctxt);
3433     /* strings coming from the dictionnary direct compare possible */
3434     if (ret == other) {
3435 	return (const xmlChar*) 1;
3436     }
3437     return ret;
3438 }
3439 
3440 /**
3441  * xmlParseStringName:
3442  * @ctxt:  an XML parser context
3443  * @str:  a pointer to the string pointer (IN/OUT)
3444  *
3445  * parse an XML name.
3446  *
3447  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448  *                  CombiningChar | Extender
3449  *
3450  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3451  *
3452  * [6] Names ::= Name (#x20 Name)*
3453  *
3454  * Returns the Name parsed or NULL. The @str pointer
3455  * is updated to the current location in the string.
3456  */
3457 
3458 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3459 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460     xmlChar buf[XML_MAX_NAMELEN + 5];
3461     const xmlChar *cur = *str;
3462     int len = 0, l;
3463     int c;
3464 
3465 #ifdef DEBUG
3466     nbParseStringName++;
3467 #endif
3468 
3469     c = CUR_SCHAR(cur, l);
3470     if (!xmlIsNameStartChar(ctxt, c)) {
3471 	return(NULL);
3472     }
3473 
3474     COPY_BUF(l,buf,len,c);
3475     cur += l;
3476     c = CUR_SCHAR(cur, l);
3477     while (xmlIsNameChar(ctxt, c)) {
3478 	COPY_BUF(l,buf,len,c);
3479 	cur += l;
3480 	c = CUR_SCHAR(cur, l);
3481 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3482 	    /*
3483 	     * Okay someone managed to make a huge name, so he's ready to pay
3484 	     * for the processing speed.
3485 	     */
3486 	    xmlChar *buffer;
3487 	    int max = len * 2;
3488 
3489 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3490 	    if (buffer == NULL) {
3491 	        xmlErrMemory(ctxt, NULL);
3492 		return(NULL);
3493 	    }
3494 	    memcpy(buffer, buf, len);
3495 	    while (xmlIsNameChar(ctxt, c)) {
3496 		if (len + 10 > max) {
3497 		    xmlChar *tmp;
3498 		    max *= 2;
3499 		    tmp = (xmlChar *) xmlRealloc(buffer,
3500 			                            max * sizeof(xmlChar));
3501 		    if (tmp == NULL) {
3502 			xmlErrMemory(ctxt, NULL);
3503 			xmlFree(buffer);
3504 			return(NULL);
3505 		    }
3506 		    buffer = tmp;
3507 		}
3508 		COPY_BUF(l,buffer,len,c);
3509 		cur += l;
3510 		c = CUR_SCHAR(cur, l);
3511 	    }
3512 	    buffer[len] = 0;
3513 	    *str = cur;
3514 	    return(buffer);
3515 	}
3516     }
3517     *str = cur;
3518     return(xmlStrndup(buf, len));
3519 }
3520 
3521 /**
3522  * xmlParseNmtoken:
3523  * @ctxt:  an XML parser context
3524  *
3525  * parse an XML Nmtoken.
3526  *
3527  * [7] Nmtoken ::= (NameChar)+
3528  *
3529  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3530  *
3531  * Returns the Nmtoken parsed or NULL
3532  */
3533 
3534 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3535 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536     xmlChar buf[XML_MAX_NAMELEN + 5];
3537     int len = 0, l;
3538     int c;
3539     int count = 0;
3540 
3541 #ifdef DEBUG
3542     nbParseNmToken++;
3543 #endif
3544 
3545     GROW;
3546     c = CUR_CHAR(l);
3547 
3548     while (xmlIsNameChar(ctxt, c)) {
3549 	if (count++ > 100) {
3550 	    count = 0;
3551 	    GROW;
3552 	}
3553 	COPY_BUF(l,buf,len,c);
3554 	NEXTL(l);
3555 	c = CUR_CHAR(l);
3556 	if (len >= XML_MAX_NAMELEN) {
3557 	    /*
3558 	     * Okay someone managed to make a huge token, so he's ready to pay
3559 	     * for the processing speed.
3560 	     */
3561 	    xmlChar *buffer;
3562 	    int max = len * 2;
3563 
3564 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3565 	    if (buffer == NULL) {
3566 	        xmlErrMemory(ctxt, NULL);
3567 		return(NULL);
3568 	    }
3569 	    memcpy(buffer, buf, len);
3570 	    while (xmlIsNameChar(ctxt, c)) {
3571 		if (count++ > 100) {
3572 		    count = 0;
3573 		    GROW;
3574 		}
3575 		if (len + 10 > max) {
3576 		    xmlChar *tmp;
3577 
3578 		    max *= 2;
3579 		    tmp = (xmlChar *) xmlRealloc(buffer,
3580 			                            max * sizeof(xmlChar));
3581 		    if (tmp == NULL) {
3582 			xmlErrMemory(ctxt, NULL);
3583 			xmlFree(buffer);
3584 			return(NULL);
3585 		    }
3586 		    buffer = tmp;
3587 		}
3588 		COPY_BUF(l,buffer,len,c);
3589 		NEXTL(l);
3590 		c = CUR_CHAR(l);
3591 	    }
3592 	    buffer[len] = 0;
3593 	    return(buffer);
3594 	}
3595     }
3596     if (len == 0)
3597         return(NULL);
3598     return(xmlStrndup(buf, len));
3599 }
3600 
3601 /**
3602  * xmlParseEntityValue:
3603  * @ctxt:  an XML parser context
3604  * @orig:  if non-NULL store a copy of the original entity value
3605  *
3606  * parse a value for ENTITY declarations
3607  *
3608  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3610  *
3611  * Returns the EntityValue parsed with reference substituted or NULL
3612  */
3613 
3614 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3615 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616     xmlChar *buf = NULL;
3617     int len = 0;
3618     int size = XML_PARSER_BUFFER_SIZE;
3619     int c, l;
3620     xmlChar stop;
3621     xmlChar *ret = NULL;
3622     const xmlChar *cur = NULL;
3623     xmlParserInputPtr input;
3624 
3625     if (RAW == '"') stop = '"';
3626     else if (RAW == '\'') stop = '\'';
3627     else {
3628 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3629 	return(NULL);
3630     }
3631     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3632     if (buf == NULL) {
3633 	xmlErrMemory(ctxt, NULL);
3634 	return(NULL);
3635     }
3636 
3637     /*
3638      * The content of the entity definition is copied in a buffer.
3639      */
3640 
3641     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642     input = ctxt->input;
3643     GROW;
3644     NEXT;
3645     c = CUR_CHAR(l);
3646     /*
3647      * NOTE: 4.4.5 Included in Literal
3648      * When a parameter entity reference appears in a literal entity
3649      * value, ... a single or double quote character in the replacement
3650      * text is always treated as a normal data character and will not
3651      * terminate the literal.
3652      * In practice it means we stop the loop only when back at parsing
3653      * the initial entity and the quote is found
3654      */
3655     while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3656 	   (ctxt->input != input))) {
3657 	if (len + 5 >= size) {
3658 	    xmlChar *tmp;
3659 
3660 	    size *= 2;
3661 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662 	    if (tmp == NULL) {
3663 		xmlErrMemory(ctxt, NULL);
3664 		xmlFree(buf);
3665 		return(NULL);
3666 	    }
3667 	    buf = tmp;
3668 	}
3669 	COPY_BUF(l,buf,len,c);
3670 	NEXTL(l);
3671 	/*
3672 	 * Pop-up of finished entities.
3673 	 */
3674 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3675 	    xmlPopInput(ctxt);
3676 
3677 	GROW;
3678 	c = CUR_CHAR(l);
3679 	if (c == 0) {
3680 	    GROW;
3681 	    c = CUR_CHAR(l);
3682 	}
3683     }
3684     buf[len] = 0;
3685 
3686     /*
3687      * Raise problem w.r.t. '&' and '%' being used in non-entities
3688      * reference constructs. Note Charref will be handled in
3689      * xmlStringDecodeEntities()
3690      */
3691     cur = buf;
3692     while (*cur != 0) { /* non input consuming */
3693 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3694 	    xmlChar *name;
3695 	    xmlChar tmp = *cur;
3696 
3697 	    cur++;
3698 	    name = xmlParseStringName(ctxt, &cur);
3699             if ((name == NULL) || (*cur != ';')) {
3700 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3701 	    "EntityValue: '%c' forbidden except for entities references\n",
3702 	                          tmp);
3703 	    }
3704 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705 		(ctxt->inputNr == 1)) {
3706 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3707 	    }
3708 	    if (name != NULL)
3709 		xmlFree(name);
3710 	    if (*cur == 0)
3711 	        break;
3712 	}
3713 	cur++;
3714     }
3715 
3716     /*
3717      * Then PEReference entities are substituted.
3718      */
3719     if (c != stop) {
3720 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3721 	xmlFree(buf);
3722     } else {
3723 	NEXT;
3724 	/*
3725 	 * NOTE: 4.4.7 Bypassed
3726 	 * When a general entity reference appears in the EntityValue in
3727 	 * an entity declaration, it is bypassed and left as is.
3728 	 * so XML_SUBSTITUTE_REF is not set here.
3729 	 */
3730 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3731 				      0, 0, 0);
3732 	if (orig != NULL)
3733 	    *orig = buf;
3734 	else
3735 	    xmlFree(buf);
3736     }
3737 
3738     return(ret);
3739 }
3740 
3741 /**
3742  * xmlParseAttValueComplex:
3743  * @ctxt:  an XML parser context
3744  * @len:   the resulting attribute len
3745  * @normalize:  wether to apply the inner normalization
3746  *
3747  * parse a value for an attribute, this is the fallback function
3748  * of xmlParseAttValue() when the attribute parsing requires handling
3749  * of non-ASCII characters, or normalization compaction.
3750  *
3751  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3752  */
3753 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3754 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3755     xmlChar limit = 0;
3756     xmlChar *buf = NULL;
3757     xmlChar *rep = NULL;
3758     int len = 0;
3759     int buf_size = 0;
3760     int c, l, in_space = 0;
3761     xmlChar *current = NULL;
3762     xmlEntityPtr ent;
3763 
3764     if (NXT(0) == '"') {
3765 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3766 	limit = '"';
3767         NEXT;
3768     } else if (NXT(0) == '\'') {
3769 	limit = '\'';
3770 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3771         NEXT;
3772     } else {
3773 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3774 	return(NULL);
3775     }
3776 
3777     /*
3778      * allocate a translation buffer.
3779      */
3780     buf_size = XML_PARSER_BUFFER_SIZE;
3781     buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3782     if (buf == NULL) goto mem_error;
3783 
3784     /*
3785      * OK loop until we reach one of the ending char or a size limit.
3786      */
3787     c = CUR_CHAR(l);
3788     while ((NXT(0) != limit) && /* checked */
3789            (IS_CHAR(c)) && (c != '<')) {
3790 	if (c == 0) break;
3791 	if (c == '&') {
3792 	    in_space = 0;
3793 	    if (NXT(1) == '#') {
3794 		int val = xmlParseCharRef(ctxt);
3795 
3796 		if (val == '&') {
3797 		    if (ctxt->replaceEntities) {
3798 			if (len > buf_size - 10) {
3799 			    growBuffer(buf, 10);
3800 			}
3801 			buf[len++] = '&';
3802 		    } else {
3803 			/*
3804 			 * The reparsing will be done in xmlStringGetNodeList()
3805 			 * called by the attribute() function in SAX.c
3806 			 */
3807 			if (len > buf_size - 10) {
3808 			    growBuffer(buf, 10);
3809 			}
3810 			buf[len++] = '&';
3811 			buf[len++] = '#';
3812 			buf[len++] = '3';
3813 			buf[len++] = '8';
3814 			buf[len++] = ';';
3815 		    }
3816 		} else if (val != 0) {
3817 		    if (len > buf_size - 10) {
3818 			growBuffer(buf, 10);
3819 		    }
3820 		    len += xmlCopyChar(0, &buf[len], val);
3821 		}
3822 	    } else {
3823 		ent = xmlParseEntityRef(ctxt);
3824 		ctxt->nbentities++;
3825 		if (ent != NULL)
3826 		    ctxt->nbentities += ent->owner;
3827 		if ((ent != NULL) &&
3828 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829 		    if (len > buf_size - 10) {
3830 			growBuffer(buf, 10);
3831 		    }
3832 		    if ((ctxt->replaceEntities == 0) &&
3833 		        (ent->content[0] == '&')) {
3834 			buf[len++] = '&';
3835 			buf[len++] = '#';
3836 			buf[len++] = '3';
3837 			buf[len++] = '8';
3838 			buf[len++] = ';';
3839 		    } else {
3840 			buf[len++] = ent->content[0];
3841 		    }
3842 		} else if ((ent != NULL) &&
3843 		           (ctxt->replaceEntities != 0)) {
3844 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3846 						      XML_SUBSTITUTE_REF,
3847 						      0, 0, 0);
3848 			if (rep != NULL) {
3849 			    current = rep;
3850 			    while (*current != 0) { /* non input consuming */
3851                                 if ((*current == 0xD) || (*current == 0xA) ||
3852                                     (*current == 0x9)) {
3853                                     buf[len++] = 0x20;
3854                                     current++;
3855                                 } else
3856                                     buf[len++] = *current++;
3857 				if (len > buf_size - 10) {
3858 				    growBuffer(buf, 10);
3859 				}
3860 			    }
3861 			    xmlFree(rep);
3862 			    rep = NULL;
3863 			}
3864 		    } else {
3865 			if (len > buf_size - 10) {
3866 			    growBuffer(buf, 10);
3867 			}
3868 			if (ent->content != NULL)
3869 			    buf[len++] = ent->content[0];
3870 		    }
3871 		} else if (ent != NULL) {
3872 		    int i = xmlStrlen(ent->name);
3873 		    const xmlChar *cur = ent->name;
3874 
3875 		    /*
3876 		     * This may look absurd but is needed to detect
3877 		     * entities problems
3878 		     */
3879 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880 			(ent->content != NULL)) {
3881 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3882 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3883 			if (rep != NULL) {
3884 			    xmlFree(rep);
3885 			    rep = NULL;
3886 			}
3887 		    }
3888 
3889 		    /*
3890 		     * Just output the reference
3891 		     */
3892 		    buf[len++] = '&';
3893 		    while (len > buf_size - i - 10) {
3894 			growBuffer(buf, i + 10);
3895 		    }
3896 		    for (;i > 0;i--)
3897 			buf[len++] = *cur++;
3898 		    buf[len++] = ';';
3899 		}
3900 	    }
3901 	} else {
3902 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903 	        if ((len != 0) || (!normalize)) {
3904 		    if ((!normalize) || (!in_space)) {
3905 			COPY_BUF(l,buf,len,0x20);
3906 			while (len > buf_size - 10) {
3907 			    growBuffer(buf, 10);
3908 			}
3909 		    }
3910 		    in_space = 1;
3911 		}
3912 	    } else {
3913 	        in_space = 0;
3914 		COPY_BUF(l,buf,len,c);
3915 		if (len > buf_size - 10) {
3916 		    growBuffer(buf, 10);
3917 		}
3918 	    }
3919 	    NEXTL(l);
3920 	}
3921 	GROW;
3922 	c = CUR_CHAR(l);
3923     }
3924     if ((in_space) && (normalize)) {
3925         while (buf[len - 1] == 0x20) len--;
3926     }
3927     buf[len] = 0;
3928     if (RAW == '<') {
3929 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3930     } else if (RAW != limit) {
3931 	if ((c != 0) && (!IS_CHAR(c))) {
3932 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933 			   "invalid character in attribute value\n");
3934 	} else {
3935 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936 			   "AttValue: ' expected\n");
3937         }
3938     } else
3939 	NEXT;
3940     if (attlen != NULL) *attlen = len;
3941     return(buf);
3942 
3943 mem_error:
3944     xmlErrMemory(ctxt, NULL);
3945     if (buf != NULL)
3946         xmlFree(buf);
3947     if (rep != NULL)
3948         xmlFree(rep);
3949     return(NULL);
3950 }
3951 
3952 /**
3953  * xmlParseAttValue:
3954  * @ctxt:  an XML parser context
3955  *
3956  * parse a value for an attribute
3957  * Note: the parser won't do substitution of entities here, this
3958  * will be handled later in xmlStringGetNodeList
3959  *
3960  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961  *                   "'" ([^<&'] | Reference)* "'"
3962  *
3963  * 3.3.3 Attribute-Value Normalization:
3964  * Before the value of an attribute is passed to the application or
3965  * checked for validity, the XML processor must normalize it as follows:
3966  * - a character reference is processed by appending the referenced
3967  *   character to the attribute value
3968  * - an entity reference is processed by recursively processing the
3969  *   replacement text of the entity
3970  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971  *   appending #x20 to the normalized value, except that only a single
3972  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3973  *   parsed entity or the literal entity value of an internal parsed entity
3974  * - other characters are processed by appending them to the normalized value
3975  * If the declared value is not CDATA, then the XML processor must further
3976  * process the normalized attribute value by discarding any leading and
3977  * trailing space (#x20) characters, and by replacing sequences of space
3978  * (#x20) characters by a single space (#x20) character.
3979  * All attributes for which no declaration has been read should be treated
3980  * by a non-validating parser as if declared CDATA.
3981  *
3982  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3983  */
3984 
3985 
3986 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3987 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3988     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3989     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3990 }
3991 
3992 /**
3993  * xmlParseSystemLiteral:
3994  * @ctxt:  an XML parser context
3995  *
3996  * parse an XML Literal
3997  *
3998  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3999  *
4000  * Returns the SystemLiteral parsed or NULL
4001  */
4002 
4003 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4004 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005     xmlChar *buf = NULL;
4006     int len = 0;
4007     int size = XML_PARSER_BUFFER_SIZE;
4008     int cur, l;
4009     xmlChar stop;
4010     int state = ctxt->instate;
4011     int count = 0;
4012 
4013     SHRINK;
4014     if (RAW == '"') {
4015         NEXT;
4016 	stop = '"';
4017     } else if (RAW == '\'') {
4018         NEXT;
4019 	stop = '\'';
4020     } else {
4021 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4022 	return(NULL);
4023     }
4024 
4025     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4026     if (buf == NULL) {
4027         xmlErrMemory(ctxt, NULL);
4028 	return(NULL);
4029     }
4030     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4031     cur = CUR_CHAR(l);
4032     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4033 	if (len + 5 >= size) {
4034 	    xmlChar *tmp;
4035 
4036 	    size *= 2;
4037 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4038 	    if (tmp == NULL) {
4039 	        xmlFree(buf);
4040 		xmlErrMemory(ctxt, NULL);
4041 		ctxt->instate = (xmlParserInputState) state;
4042 		return(NULL);
4043 	    }
4044 	    buf = tmp;
4045 	}
4046 	count++;
4047 	if (count > 50) {
4048 	    GROW;
4049 	    count = 0;
4050 	}
4051 	COPY_BUF(l,buf,len,cur);
4052 	NEXTL(l);
4053 	cur = CUR_CHAR(l);
4054 	if (cur == 0) {
4055 	    GROW;
4056 	    SHRINK;
4057 	    cur = CUR_CHAR(l);
4058 	}
4059     }
4060     buf[len] = 0;
4061     ctxt->instate = (xmlParserInputState) state;
4062     if (!IS_CHAR(cur)) {
4063 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064     } else {
4065 	NEXT;
4066     }
4067     return(buf);
4068 }
4069 
4070 /**
4071  * xmlParsePubidLiteral:
4072  * @ctxt:  an XML parser context
4073  *
4074  * parse an XML public literal
4075  *
4076  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4077  *
4078  * Returns the PubidLiteral parsed or NULL.
4079  */
4080 
4081 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4082 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083     xmlChar *buf = NULL;
4084     int len = 0;
4085     int size = XML_PARSER_BUFFER_SIZE;
4086     xmlChar cur;
4087     xmlChar stop;
4088     int count = 0;
4089     xmlParserInputState oldstate = ctxt->instate;
4090 
4091     SHRINK;
4092     if (RAW == '"') {
4093         NEXT;
4094 	stop = '"';
4095     } else if (RAW == '\'') {
4096         NEXT;
4097 	stop = '\'';
4098     } else {
4099 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4100 	return(NULL);
4101     }
4102     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4103     if (buf == NULL) {
4104 	xmlErrMemory(ctxt, NULL);
4105 	return(NULL);
4106     }
4107     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4108     cur = CUR;
4109     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4110 	if (len + 1 >= size) {
4111 	    xmlChar *tmp;
4112 
4113 	    size *= 2;
4114 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4115 	    if (tmp == NULL) {
4116 		xmlErrMemory(ctxt, NULL);
4117 		xmlFree(buf);
4118 		return(NULL);
4119 	    }
4120 	    buf = tmp;
4121 	}
4122 	buf[len++] = cur;
4123 	count++;
4124 	if (count > 50) {
4125 	    GROW;
4126 	    count = 0;
4127 	}
4128 	NEXT;
4129 	cur = CUR;
4130 	if (cur == 0) {
4131 	    GROW;
4132 	    SHRINK;
4133 	    cur = CUR;
4134 	}
4135     }
4136     buf[len] = 0;
4137     if (cur != stop) {
4138 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4139     } else {
4140 	NEXT;
4141     }
4142     ctxt->instate = oldstate;
4143     return(buf);
4144 }
4145 
4146 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4147 
4148 /*
4149  * used for the test in the inner loop of the char data testing
4150  */
4151 static const unsigned char test_char_data[256] = {
4152     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4184 };
4185 
4186 /**
4187  * xmlParseCharData:
4188  * @ctxt:  an XML parser context
4189  * @cdata:  int indicating whether we are within a CDATA section
4190  *
4191  * parse a CharData section.
4192  * if we are within a CDATA section ']]>' marks an end of section.
4193  *
4194  * The right angle bracket (>) may be represented using the string "&gt;",
4195  * and must, for compatibility, be escaped using "&gt;" or a character
4196  * reference when it appears in the string "]]>" in content, when that
4197  * string is not marking the end of a CDATA section.
4198  *
4199  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4200  */
4201 
4202 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4203 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4204     const xmlChar *in;
4205     int nbchar = 0;
4206     int line = ctxt->input->line;
4207     int col = ctxt->input->col;
4208     int ccol;
4209 
4210     SHRINK;
4211     GROW;
4212     /*
4213      * Accelerated common case where input don't need to be
4214      * modified before passing it to the handler.
4215      */
4216     if (!cdata) {
4217 	in = ctxt->input->cur;
4218 	do {
4219 get_more_space:
4220 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4221 	    if (*in == 0xA) {
4222 		do {
4223 		    ctxt->input->line++; ctxt->input->col = 1;
4224 		    in++;
4225 		} while (*in == 0xA);
4226 		goto get_more_space;
4227 	    }
4228 	    if (*in == '<') {
4229 		nbchar = in - ctxt->input->cur;
4230 		if (nbchar > 0) {
4231 		    const xmlChar *tmp = ctxt->input->cur;
4232 		    ctxt->input->cur = in;
4233 
4234 		    if ((ctxt->sax != NULL) &&
4235 		        (ctxt->sax->ignorableWhitespace !=
4236 		         ctxt->sax->characters)) {
4237 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4238 			    if (ctxt->sax->ignorableWhitespace != NULL)
4239 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4240 						       tmp, nbchar);
4241 			} else {
4242 			    if (ctxt->sax->characters != NULL)
4243 				ctxt->sax->characters(ctxt->userData,
4244 						      tmp, nbchar);
4245 			    if (*ctxt->space == -1)
4246 			        *ctxt->space = -2;
4247 			}
4248 		    } else if ((ctxt->sax != NULL) &&
4249 		               (ctxt->sax->characters != NULL)) {
4250 			ctxt->sax->characters(ctxt->userData,
4251 					      tmp, nbchar);
4252 		    }
4253 		}
4254 		return;
4255 	    }
4256 
4257 get_more:
4258             ccol = ctxt->input->col;
4259 	    while (test_char_data[*in]) {
4260 		in++;
4261 		ccol++;
4262 	    }
4263 	    ctxt->input->col = ccol;
4264 	    if (*in == 0xA) {
4265 		do {
4266 		    ctxt->input->line++; ctxt->input->col = 1;
4267 		    in++;
4268 		} while (*in == 0xA);
4269 		goto get_more;
4270 	    }
4271 	    if (*in == ']') {
4272 		if ((in[1] == ']') && (in[2] == '>')) {
4273 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4274 		    ctxt->input->cur = in;
4275 		    return;
4276 		}
4277 		in++;
4278 		ctxt->input->col++;
4279 		goto get_more;
4280 	    }
4281 	    nbchar = in - ctxt->input->cur;
4282 	    if (nbchar > 0) {
4283 		if ((ctxt->sax != NULL) &&
4284 		    (ctxt->sax->ignorableWhitespace !=
4285 		     ctxt->sax->characters) &&
4286 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4287 		    const xmlChar *tmp = ctxt->input->cur;
4288 		    ctxt->input->cur = in;
4289 
4290 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4291 		        if (ctxt->sax->ignorableWhitespace != NULL)
4292 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4293 							   tmp, nbchar);
4294 		    } else {
4295 		        if (ctxt->sax->characters != NULL)
4296 			    ctxt->sax->characters(ctxt->userData,
4297 						  tmp, nbchar);
4298 			if (*ctxt->space == -1)
4299 			    *ctxt->space = -2;
4300 		    }
4301                     line = ctxt->input->line;
4302                     col = ctxt->input->col;
4303 		} else if (ctxt->sax != NULL) {
4304 		    if (ctxt->sax->characters != NULL)
4305 			ctxt->sax->characters(ctxt->userData,
4306 					      ctxt->input->cur, nbchar);
4307                     line = ctxt->input->line;
4308                     col = ctxt->input->col;
4309 		}
4310                 /* something really bad happened in the SAX callback */
4311                 if (ctxt->instate != XML_PARSER_CONTENT)
4312                     return;
4313 	    }
4314 	    ctxt->input->cur = in;
4315 	    if (*in == 0xD) {
4316 		in++;
4317 		if (*in == 0xA) {
4318 		    ctxt->input->cur = in;
4319 		    in++;
4320 		    ctxt->input->line++; ctxt->input->col = 1;
4321 		    continue; /* while */
4322 		}
4323 		in--;
4324 	    }
4325 	    if (*in == '<') {
4326 		return;
4327 	    }
4328 	    if (*in == '&') {
4329 		return;
4330 	    }
4331 	    SHRINK;
4332 	    GROW;
4333 	    in = ctxt->input->cur;
4334 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4335 	nbchar = 0;
4336     }
4337     ctxt->input->line = line;
4338     ctxt->input->col = col;
4339     xmlParseCharDataComplex(ctxt, cdata);
4340 }
4341 
4342 /**
4343  * xmlParseCharDataComplex:
4344  * @ctxt:  an XML parser context
4345  * @cdata:  int indicating whether we are within a CDATA section
4346  *
4347  * parse a CharData section.this is the fallback function
4348  * of xmlParseCharData() when the parsing requires handling
4349  * of non-ASCII characters.
4350  */
4351 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4352 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4353     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4354     int nbchar = 0;
4355     int cur, l;
4356     int count = 0;
4357 
4358     SHRINK;
4359     GROW;
4360     cur = CUR_CHAR(l);
4361     while ((cur != '<') && /* checked */
4362            (cur != '&') &&
4363 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4364 	if ((cur == ']') && (NXT(1) == ']') &&
4365 	    (NXT(2) == '>')) {
4366 	    if (cdata) break;
4367 	    else {
4368 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4369 	    }
4370 	}
4371 	COPY_BUF(l,buf,nbchar,cur);
4372 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4373 	    buf[nbchar] = 0;
4374 
4375 	    /*
4376 	     * OK the segment is to be consumed as chars.
4377 	     */
4378 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4379 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4380 		    if (ctxt->sax->ignorableWhitespace != NULL)
4381 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4382 			                               buf, nbchar);
4383 		} else {
4384 		    if (ctxt->sax->characters != NULL)
4385 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4386 		    if ((ctxt->sax->characters !=
4387 		         ctxt->sax->ignorableWhitespace) &&
4388 			(*ctxt->space == -1))
4389 			*ctxt->space = -2;
4390 		}
4391 	    }
4392 	    nbchar = 0;
4393             /* something really bad happened in the SAX callback */
4394             if (ctxt->instate != XML_PARSER_CONTENT)
4395                 return;
4396 	}
4397 	count++;
4398 	if (count > 50) {
4399 	    GROW;
4400 	    count = 0;
4401 	}
4402 	NEXTL(l);
4403 	cur = CUR_CHAR(l);
4404     }
4405     if (nbchar != 0) {
4406         buf[nbchar] = 0;
4407 	/*
4408 	 * OK the segment is to be consumed as chars.
4409 	 */
4410 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4411 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4412 		if (ctxt->sax->ignorableWhitespace != NULL)
4413 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4414 	    } else {
4415 		if (ctxt->sax->characters != NULL)
4416 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4417 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418 		    (*ctxt->space == -1))
4419 		    *ctxt->space = -2;
4420 	    }
4421 	}
4422     }
4423     if ((cur != 0) && (!IS_CHAR(cur))) {
4424 	/* Generate the error and skip the offending character */
4425         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426                           "PCDATA invalid Char value %d\n",
4427 	                  cur);
4428 	NEXTL(l);
4429     }
4430 }
4431 
4432 /**
4433  * xmlParseExternalID:
4434  * @ctxt:  an XML parser context
4435  * @publicID:  a xmlChar** receiving PubidLiteral
4436  * @strict: indicate whether we should restrict parsing to only
4437  *          production [75], see NOTE below
4438  *
4439  * Parse an External ID or a Public ID
4440  *
4441  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4442  *       'PUBLIC' S PubidLiteral S SystemLiteral
4443  *
4444  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4446  *
4447  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4448  *
4449  * Returns the function returns SystemLiteral and in the second
4450  *                case publicID receives PubidLiteral, is strict is off
4451  *                it is possible to return NULL and have publicID set.
4452  */
4453 
4454 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4455 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456     xmlChar *URI = NULL;
4457 
4458     SHRINK;
4459 
4460     *publicID = NULL;
4461     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4462         SKIP(6);
4463 	if (!IS_BLANK_CH(CUR)) {
4464 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465 	                   "Space required after 'SYSTEM'\n");
4466 	}
4467         SKIP_BLANKS;
4468 	URI = xmlParseSystemLiteral(ctxt);
4469 	if (URI == NULL) {
4470 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4471         }
4472     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4473         SKIP(6);
4474 	if (!IS_BLANK_CH(CUR)) {
4475 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476 		    "Space required after 'PUBLIC'\n");
4477 	}
4478         SKIP_BLANKS;
4479 	*publicID = xmlParsePubidLiteral(ctxt);
4480 	if (*publicID == NULL) {
4481 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4482 	}
4483 	if (strict) {
4484 	    /*
4485 	     * We don't handle [83] so "S SystemLiteral" is required.
4486 	     */
4487 	    if (!IS_BLANK_CH(CUR)) {
4488 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 			"Space required after the Public Identifier\n");
4490 	    }
4491 	} else {
4492 	    /*
4493 	     * We handle [83] so we return immediately, if
4494 	     * "S SystemLiteral" is not detected. From a purely parsing
4495 	     * point of view that's a nice mess.
4496 	     */
4497 	    const xmlChar *ptr;
4498 	    GROW;
4499 
4500 	    ptr = CUR_PTR;
4501 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4502 
4503 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4504 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4505 	}
4506         SKIP_BLANKS;
4507 	URI = xmlParseSystemLiteral(ctxt);
4508 	if (URI == NULL) {
4509 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4510         }
4511     }
4512     return(URI);
4513 }
4514 
4515 /**
4516  * xmlParseCommentComplex:
4517  * @ctxt:  an XML parser context
4518  * @buf:  the already parsed part of the buffer
4519  * @len:  number of bytes filles in the buffer
4520  * @size:  allocated size of the buffer
4521  *
4522  * Skip an XML (SGML) comment <!-- .... -->
4523  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4524  *  must not occur within comments. "
4525  * This is the slow routine in case the accelerator for ascii didn't work
4526  *
4527  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4528  */
4529 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4530 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4531     int q, ql;
4532     int r, rl;
4533     int cur, l;
4534     int count = 0;
4535     int inputid;
4536 
4537     inputid = ctxt->input->id;
4538 
4539     if (buf == NULL) {
4540         len = 0;
4541 	size = XML_PARSER_BUFFER_SIZE;
4542 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4543 	if (buf == NULL) {
4544 	    xmlErrMemory(ctxt, NULL);
4545 	    return;
4546 	}
4547     }
4548     GROW;	/* Assure there's enough input data */
4549     q = CUR_CHAR(ql);
4550     if (q == 0)
4551         goto not_terminated;
4552     if (!IS_CHAR(q)) {
4553         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554                           "xmlParseComment: invalid xmlChar value %d\n",
4555 	                  q);
4556 	xmlFree (buf);
4557 	return;
4558     }
4559     NEXTL(ql);
4560     r = CUR_CHAR(rl);
4561     if (r == 0)
4562         goto not_terminated;
4563     if (!IS_CHAR(r)) {
4564         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565                           "xmlParseComment: invalid xmlChar value %d\n",
4566 	                  q);
4567 	xmlFree (buf);
4568 	return;
4569     }
4570     NEXTL(rl);
4571     cur = CUR_CHAR(l);
4572     if (cur == 0)
4573         goto not_terminated;
4574     while (IS_CHAR(cur) && /* checked */
4575            ((cur != '>') ||
4576 	    (r != '-') || (q != '-'))) {
4577 	if ((r == '-') && (q == '-')) {
4578 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4579 	}
4580 	if (len + 5 >= size) {
4581 	    xmlChar *new_buf;
4582 	    size *= 2;
4583 	    new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584 	    if (new_buf == NULL) {
4585 		xmlFree (buf);
4586 		xmlErrMemory(ctxt, NULL);
4587 		return;
4588 	    }
4589 	    buf = new_buf;
4590 	}
4591 	COPY_BUF(ql,buf,len,q);
4592 	q = r;
4593 	ql = rl;
4594 	r = cur;
4595 	rl = l;
4596 
4597 	count++;
4598 	if (count > 50) {
4599 	    GROW;
4600 	    count = 0;
4601 	}
4602 	NEXTL(l);
4603 	cur = CUR_CHAR(l);
4604 	if (cur == 0) {
4605 	    SHRINK;
4606 	    GROW;
4607 	    cur = CUR_CHAR(l);
4608 	}
4609     }
4610     buf[len] = 0;
4611     if (cur == 0) {
4612 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613 	                     "Comment not terminated \n<!--%.50s\n", buf);
4614     } else if (!IS_CHAR(cur)) {
4615         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616                           "xmlParseComment: invalid xmlChar value %d\n",
4617 	                  cur);
4618     } else {
4619 	if (inputid != ctxt->input->id) {
4620 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621 		"Comment doesn't start and stop in the same entity\n");
4622 	}
4623         NEXT;
4624 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625 	    (!ctxt->disableSAX))
4626 	    ctxt->sax->comment(ctxt->userData, buf);
4627     }
4628     xmlFree(buf);
4629     return;
4630 not_terminated:
4631     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632 			 "Comment not terminated\n", NULL);
4633     xmlFree(buf);
4634     return;
4635 }
4636 
4637 /**
4638  * xmlParseComment:
4639  * @ctxt:  an XML parser context
4640  *
4641  * Skip an XML (SGML) comment <!-- .... -->
4642  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4643  *  must not occur within comments. "
4644  *
4645  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4646  */
4647 void
xmlParseComment(xmlParserCtxtPtr ctxt)4648 xmlParseComment(xmlParserCtxtPtr ctxt) {
4649     xmlChar *buf = NULL;
4650     int size = XML_PARSER_BUFFER_SIZE;
4651     int len = 0;
4652     xmlParserInputState state;
4653     const xmlChar *in;
4654     int nbchar = 0, ccol;
4655     int inputid;
4656 
4657     /*
4658      * Check that there is a comment right here.
4659      */
4660     if ((RAW != '<') || (NXT(1) != '!') ||
4661         (NXT(2) != '-') || (NXT(3) != '-')) return;
4662     state = ctxt->instate;
4663     ctxt->instate = XML_PARSER_COMMENT;
4664     inputid = ctxt->input->id;
4665     SKIP(4);
4666     SHRINK;
4667     GROW;
4668 
4669     /*
4670      * Accelerated common case where input don't need to be
4671      * modified before passing it to the handler.
4672      */
4673     in = ctxt->input->cur;
4674     do {
4675 	if (*in == 0xA) {
4676 	    do {
4677 		ctxt->input->line++; ctxt->input->col = 1;
4678 		in++;
4679 	    } while (*in == 0xA);
4680 	}
4681 get_more:
4682         ccol = ctxt->input->col;
4683 	while (((*in > '-') && (*in <= 0x7F)) ||
4684 	       ((*in >= 0x20) && (*in < '-')) ||
4685 	       (*in == 0x09)) {
4686 		    in++;
4687 		    ccol++;
4688 	}
4689 	ctxt->input->col = ccol;
4690 	if (*in == 0xA) {
4691 	    do {
4692 		ctxt->input->line++; ctxt->input->col = 1;
4693 		in++;
4694 	    } while (*in == 0xA);
4695 	    goto get_more;
4696 	}
4697 	nbchar = in - ctxt->input->cur;
4698 	/*
4699 	 * save current set of data
4700 	 */
4701 	if (nbchar > 0) {
4702 	    if ((ctxt->sax != NULL) &&
4703 		(ctxt->sax->comment != NULL)) {
4704 		if (buf == NULL) {
4705 		    if ((*in == '-') && (in[1] == '-'))
4706 		        size = nbchar + 1;
4707 		    else
4708 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4709 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4710 		    if (buf == NULL) {
4711 		        xmlErrMemory(ctxt, NULL);
4712 			ctxt->instate = state;
4713 			return;
4714 		    }
4715 		    len = 0;
4716 		} else if (len + nbchar + 1 >= size) {
4717 		    xmlChar *new_buf;
4718 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719 		    new_buf = (xmlChar *) xmlRealloc(buf,
4720 		                                     size * sizeof(xmlChar));
4721 		    if (new_buf == NULL) {
4722 		        xmlFree (buf);
4723 			xmlErrMemory(ctxt, NULL);
4724 			ctxt->instate = state;
4725 			return;
4726 		    }
4727 		    buf = new_buf;
4728 		}
4729 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4730 		len += nbchar;
4731 		buf[len] = 0;
4732 	    }
4733 	}
4734 	ctxt->input->cur = in;
4735 	if (*in == 0xA) {
4736 	    in++;
4737 	    ctxt->input->line++; ctxt->input->col = 1;
4738 	}
4739 	if (*in == 0xD) {
4740 	    in++;
4741 	    if (*in == 0xA) {
4742 		ctxt->input->cur = in;
4743 		in++;
4744 		ctxt->input->line++; ctxt->input->col = 1;
4745 		continue; /* while */
4746 	    }
4747 	    in--;
4748 	}
4749 	SHRINK;
4750 	GROW;
4751 	in = ctxt->input->cur;
4752 	if (*in == '-') {
4753 	    if (in[1] == '-') {
4754 	        if (in[2] == '>') {
4755 		    if (ctxt->input->id != inputid) {
4756 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757 			"comment doesn't start and stop in the same entity\n");
4758 		    }
4759 		    SKIP(3);
4760 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761 		        (!ctxt->disableSAX)) {
4762 			if (buf != NULL)
4763 			    ctxt->sax->comment(ctxt->userData, buf);
4764 			else
4765 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4766 		    }
4767 		    if (buf != NULL)
4768 		        xmlFree(buf);
4769 		    ctxt->instate = state;
4770 		    return;
4771 		}
4772 		if (buf != NULL)
4773 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774 		                      "Comment not terminated \n<!--%.50s\n",
4775 				      buf);
4776 		else
4777 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778 		                      "Comment not terminated \n", NULL);
4779 		in++;
4780 		ctxt->input->col++;
4781 	    }
4782 	    in++;
4783 	    ctxt->input->col++;
4784 	    goto get_more;
4785 	}
4786     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787     xmlParseCommentComplex(ctxt, buf, len, size);
4788     ctxt->instate = state;
4789     return;
4790 }
4791 
4792 
4793 /**
4794  * xmlParsePITarget:
4795  * @ctxt:  an XML parser context
4796  *
4797  * parse the name of a PI
4798  *
4799  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4800  *
4801  * Returns the PITarget name or NULL
4802  */
4803 
4804 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4805 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4806     const xmlChar *name;
4807 
4808     name = xmlParseName(ctxt);
4809     if ((name != NULL) &&
4810         ((name[0] == 'x') || (name[0] == 'X')) &&
4811         ((name[1] == 'm') || (name[1] == 'M')) &&
4812         ((name[2] == 'l') || (name[2] == 'L'))) {
4813 	int i;
4814 	if ((name[0] == 'x') && (name[1] == 'm') &&
4815 	    (name[2] == 'l') && (name[3] == 0)) {
4816 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4817 		 "XML declaration allowed only at the start of the document\n");
4818 	    return(name);
4819 	} else if (name[3] == 0) {
4820 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4821 	    return(name);
4822 	}
4823 	for (i = 0;;i++) {
4824 	    if (xmlW3CPIs[i] == NULL) break;
4825 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4826 	        return(name);
4827 	}
4828 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
4830 		      NULL, NULL);
4831     }
4832     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834 		 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4835     }
4836     return(name);
4837 }
4838 
4839 #ifdef LIBXML_CATALOG_ENABLED
4840 /**
4841  * xmlParseCatalogPI:
4842  * @ctxt:  an XML parser context
4843  * @catalog:  the PI value string
4844  *
4845  * parse an XML Catalog Processing Instruction.
4846  *
4847  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4848  *
4849  * Occurs only if allowed by the user and if happening in the Misc
4850  * part of the document before any doctype informations
4851  * This will add the given catalog to the parsing context in order
4852  * to be used if there is a resolution need further down in the document
4853  */
4854 
4855 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4856 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857     xmlChar *URL = NULL;
4858     const xmlChar *tmp, *base;
4859     xmlChar marker;
4860 
4861     tmp = catalog;
4862     while (IS_BLANK_CH(*tmp)) tmp++;
4863     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4864 	goto error;
4865     tmp += 7;
4866     while (IS_BLANK_CH(*tmp)) tmp++;
4867     if (*tmp != '=') {
4868 	return;
4869     }
4870     tmp++;
4871     while (IS_BLANK_CH(*tmp)) tmp++;
4872     marker = *tmp;
4873     if ((marker != '\'') && (marker != '"'))
4874 	goto error;
4875     tmp++;
4876     base = tmp;
4877     while ((*tmp != 0) && (*tmp != marker)) tmp++;
4878     if (*tmp == 0)
4879 	goto error;
4880     URL = xmlStrndup(base, tmp - base);
4881     tmp++;
4882     while (IS_BLANK_CH(*tmp)) tmp++;
4883     if (*tmp != 0)
4884 	goto error;
4885 
4886     if (URL != NULL) {
4887 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4888 	xmlFree(URL);
4889     }
4890     return;
4891 
4892 error:
4893     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894 	          "Catalog PI syntax error: %s\n",
4895 		  catalog, NULL);
4896     if (URL != NULL)
4897 	xmlFree(URL);
4898 }
4899 #endif
4900 
4901 /**
4902  * xmlParsePI:
4903  * @ctxt:  an XML parser context
4904  *
4905  * parse an XML Processing Instruction.
4906  *
4907  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4908  *
4909  * The processing is transfered to SAX once parsed.
4910  */
4911 
4912 void
xmlParsePI(xmlParserCtxtPtr ctxt)4913 xmlParsePI(xmlParserCtxtPtr ctxt) {
4914     xmlChar *buf = NULL;
4915     int len = 0;
4916     int size = XML_PARSER_BUFFER_SIZE;
4917     int cur, l;
4918     const xmlChar *target;
4919     xmlParserInputState state;
4920     int count = 0;
4921 
4922     if ((RAW == '<') && (NXT(1) == '?')) {
4923 	xmlParserInputPtr input = ctxt->input;
4924 	state = ctxt->instate;
4925         ctxt->instate = XML_PARSER_PI;
4926 	/*
4927 	 * this is a Processing Instruction.
4928 	 */
4929 	SKIP(2);
4930 	SHRINK;
4931 
4932 	/*
4933 	 * Parse the target name and check for special support like
4934 	 * namespace.
4935 	 */
4936         target = xmlParsePITarget(ctxt);
4937 	if (target != NULL) {
4938 	    if ((RAW == '?') && (NXT(1) == '>')) {
4939 		if (input != ctxt->input) {
4940 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941 	    "PI declaration doesn't start and stop in the same entity\n");
4942 		}
4943 		SKIP(2);
4944 
4945 		/*
4946 		 * SAX: PI detected.
4947 		 */
4948 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949 		    (ctxt->sax->processingInstruction != NULL))
4950 		    ctxt->sax->processingInstruction(ctxt->userData,
4951 		                                     target, NULL);
4952 		ctxt->instate = state;
4953 		return;
4954 	    }
4955 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4956 	    if (buf == NULL) {
4957 		xmlErrMemory(ctxt, NULL);
4958 		ctxt->instate = state;
4959 		return;
4960 	    }
4961 	    cur = CUR;
4962 	    if (!IS_BLANK(cur)) {
4963 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964 			  "ParsePI: PI %s space expected\n", target);
4965 	    }
4966             SKIP_BLANKS;
4967 	    cur = CUR_CHAR(l);
4968 	    while (IS_CHAR(cur) && /* checked */
4969 		   ((cur != '?') || (NXT(1) != '>'))) {
4970 		if (len + 5 >= size) {
4971 		    xmlChar *tmp;
4972 
4973 		    size *= 2;
4974 		    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4975 		    if (tmp == NULL) {
4976 			xmlErrMemory(ctxt, NULL);
4977 			xmlFree(buf);
4978 			ctxt->instate = state;
4979 			return;
4980 		    }
4981 		    buf = tmp;
4982 		}
4983 		count++;
4984 		if (count > 50) {
4985 		    GROW;
4986 		    count = 0;
4987 		}
4988 		COPY_BUF(l,buf,len,cur);
4989 		NEXTL(l);
4990 		cur = CUR_CHAR(l);
4991 		if (cur == 0) {
4992 		    SHRINK;
4993 		    GROW;
4994 		    cur = CUR_CHAR(l);
4995 		}
4996 	    }
4997 	    buf[len] = 0;
4998 	    if (cur != '?') {
4999 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000 		      "ParsePI: PI %s never end ...\n", target);
5001 	    } else {
5002 		if (input != ctxt->input) {
5003 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004 	    "PI declaration doesn't start and stop in the same entity\n");
5005 		}
5006 		SKIP(2);
5007 
5008 #ifdef LIBXML_CATALOG_ENABLED
5009 		if (((state == XML_PARSER_MISC) ||
5010 	             (state == XML_PARSER_START)) &&
5011 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5012 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014 			(allow == XML_CATA_ALLOW_ALL))
5015 			xmlParseCatalogPI(ctxt, buf);
5016 		}
5017 #endif
5018 
5019 
5020 		/*
5021 		 * SAX: PI detected.
5022 		 */
5023 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024 		    (ctxt->sax->processingInstruction != NULL))
5025 		    ctxt->sax->processingInstruction(ctxt->userData,
5026 		                                     target, buf);
5027 	    }
5028 	    xmlFree(buf);
5029 	} else {
5030 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5031 	}
5032 	ctxt->instate = state;
5033     }
5034 }
5035 
5036 /**
5037  * xmlParseNotationDecl:
5038  * @ctxt:  an XML parser context
5039  *
5040  * parse a notation declaration
5041  *
5042  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5043  *
5044  * Hence there is actually 3 choices:
5045  *     'PUBLIC' S PubidLiteral
5046  *     'PUBLIC' S PubidLiteral S SystemLiteral
5047  * and 'SYSTEM' S SystemLiteral
5048  *
5049  * See the NOTE on xmlParseExternalID().
5050  */
5051 
5052 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5053 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5054     const xmlChar *name;
5055     xmlChar *Pubid;
5056     xmlChar *Systemid;
5057 
5058     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5059 	xmlParserInputPtr input = ctxt->input;
5060 	SHRINK;
5061 	SKIP(10);
5062 	if (!IS_BLANK_CH(CUR)) {
5063 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064 			   "Space required after '<!NOTATION'\n");
5065 	    return;
5066 	}
5067 	SKIP_BLANKS;
5068 
5069         name = xmlParseName(ctxt);
5070 	if (name == NULL) {
5071 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5072 	    return;
5073 	}
5074 	if (!IS_BLANK_CH(CUR)) {
5075 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076 		     "Space required after the NOTATION name'\n");
5077 	    return;
5078 	}
5079 	if (xmlStrchr(name, ':') != NULL) {
5080 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081 		     "colon are forbidden from notation names '%s'\n",
5082 		     name, NULL, NULL);
5083 	}
5084 	SKIP_BLANKS;
5085 
5086 	/*
5087 	 * Parse the IDs.
5088 	 */
5089 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5090 	SKIP_BLANKS;
5091 
5092 	if (RAW == '>') {
5093 	    if (input != ctxt->input) {
5094 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095 	"Notation declaration doesn't start and stop in the same entity\n");
5096 	    }
5097 	    NEXT;
5098 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099 		(ctxt->sax->notationDecl != NULL))
5100 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5101 	} else {
5102 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5103 	}
5104 	if (Systemid != NULL) xmlFree(Systemid);
5105 	if (Pubid != NULL) xmlFree(Pubid);
5106     }
5107 }
5108 
5109 /**
5110  * xmlParseEntityDecl:
5111  * @ctxt:  an XML parser context
5112  *
5113  * parse <!ENTITY declarations
5114  *
5115  * [70] EntityDecl ::= GEDecl | PEDecl
5116  *
5117  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5118  *
5119  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5120  *
5121  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5122  *
5123  * [74] PEDef ::= EntityValue | ExternalID
5124  *
5125  * [76] NDataDecl ::= S 'NDATA' S Name
5126  *
5127  * [ VC: Notation Declared ]
5128  * The Name must match the declared name of a notation.
5129  */
5130 
5131 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5132 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5133     const xmlChar *name = NULL;
5134     xmlChar *value = NULL;
5135     xmlChar *URI = NULL, *literal = NULL;
5136     const xmlChar *ndata = NULL;
5137     int isParameter = 0;
5138     xmlChar *orig = NULL;
5139     int skipped;
5140 
5141     /* GROW; done in the caller */
5142     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5143 	xmlParserInputPtr input = ctxt->input;
5144 	SHRINK;
5145 	SKIP(8);
5146 	skipped = SKIP_BLANKS;
5147 	if (skipped == 0) {
5148 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149 			   "Space required after '<!ENTITY'\n");
5150 	}
5151 
5152 	if (RAW == '%') {
5153 	    NEXT;
5154 	    skipped = SKIP_BLANKS;
5155 	    if (skipped == 0) {
5156 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157 			       "Space required after '%'\n");
5158 	    }
5159 	    isParameter = 1;
5160 	}
5161 
5162         name = xmlParseName(ctxt);
5163 	if (name == NULL) {
5164 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165 	                   "xmlParseEntityDecl: no name\n");
5166             return;
5167 	}
5168 	if (xmlStrchr(name, ':') != NULL) {
5169 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170 		     "colon are forbidden from entities names '%s'\n",
5171 		     name, NULL, NULL);
5172 	}
5173         skipped = SKIP_BLANKS;
5174 	if (skipped == 0) {
5175 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 			   "Space required after the entity name\n");
5177 	}
5178 
5179 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5180 	/*
5181 	 * handle the various case of definitions...
5182 	 */
5183 	if (isParameter) {
5184 	    if ((RAW == '"') || (RAW == '\'')) {
5185 	        value = xmlParseEntityValue(ctxt, &orig);
5186 		if (value) {
5187 		    if ((ctxt->sax != NULL) &&
5188 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189 			ctxt->sax->entityDecl(ctxt->userData, name,
5190 		                    XML_INTERNAL_PARAMETER_ENTITY,
5191 				    NULL, NULL, value);
5192 		}
5193 	    } else {
5194 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5195 		if ((URI == NULL) && (literal == NULL)) {
5196 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5197 		}
5198 		if (URI) {
5199 		    xmlURIPtr uri;
5200 
5201 		    uri = xmlParseURI((const char *) URI);
5202 		    if (uri == NULL) {
5203 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204 				     "Invalid URI: %s\n", URI);
5205 			/*
5206 			 * This really ought to be a well formedness error
5207 			 * but the XML Core WG decided otherwise c.f. issue
5208 			 * E26 of the XML erratas.
5209 			 */
5210 		    } else {
5211 			if (uri->fragment != NULL) {
5212 			    /*
5213 			     * Okay this is foolish to block those but not
5214 			     * invalid URIs.
5215 			     */
5216 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5217 			} else {
5218 			    if ((ctxt->sax != NULL) &&
5219 				(!ctxt->disableSAX) &&
5220 				(ctxt->sax->entityDecl != NULL))
5221 				ctxt->sax->entityDecl(ctxt->userData, name,
5222 					    XML_EXTERNAL_PARAMETER_ENTITY,
5223 					    literal, URI, NULL);
5224 			}
5225 			xmlFreeURI(uri);
5226 		    }
5227 		}
5228 	    }
5229 	} else {
5230 	    if ((RAW == '"') || (RAW == '\'')) {
5231 	        value = xmlParseEntityValue(ctxt, &orig);
5232 		if ((ctxt->sax != NULL) &&
5233 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234 		    ctxt->sax->entityDecl(ctxt->userData, name,
5235 				XML_INTERNAL_GENERAL_ENTITY,
5236 				NULL, NULL, value);
5237 		/*
5238 		 * For expat compatibility in SAX mode.
5239 		 */
5240 		if ((ctxt->myDoc == NULL) ||
5241 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242 		    if (ctxt->myDoc == NULL) {
5243 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5244 			if (ctxt->myDoc == NULL) {
5245 			    xmlErrMemory(ctxt, "New Doc failed");
5246 			    return;
5247 			}
5248 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5249 		    }
5250 		    if (ctxt->myDoc->intSubset == NULL)
5251 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252 					    BAD_CAST "fake", NULL, NULL);
5253 
5254 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5255 			              NULL, NULL, value);
5256 		}
5257 	    } else {
5258 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5259 		if ((URI == NULL) && (literal == NULL)) {
5260 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5261 		}
5262 		if (URI) {
5263 		    xmlURIPtr uri;
5264 
5265 		    uri = xmlParseURI((const char *)URI);
5266 		    if (uri == NULL) {
5267 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268 				     "Invalid URI: %s\n", URI);
5269 			/*
5270 			 * This really ought to be a well formedness error
5271 			 * but the XML Core WG decided otherwise c.f. issue
5272 			 * E26 of the XML erratas.
5273 			 */
5274 		    } else {
5275 			if (uri->fragment != NULL) {
5276 			    /*
5277 			     * Okay this is foolish to block those but not
5278 			     * invalid URIs.
5279 			     */
5280 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5281 			}
5282 			xmlFreeURI(uri);
5283 		    }
5284 		}
5285 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5286 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287 				   "Space required before 'NDATA'\n");
5288 		}
5289 		SKIP_BLANKS;
5290 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5291 		    SKIP(5);
5292 		    if (!IS_BLANK_CH(CUR)) {
5293 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 				       "Space required after 'NDATA'\n");
5295 		    }
5296 		    SKIP_BLANKS;
5297 		    ndata = xmlParseName(ctxt);
5298 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 		        (ctxt->sax->unparsedEntityDecl != NULL))
5300 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301 				    literal, URI, ndata);
5302 		} else {
5303 		    if ((ctxt->sax != NULL) &&
5304 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305 			ctxt->sax->entityDecl(ctxt->userData, name,
5306 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307 				    literal, URI, NULL);
5308 		    /*
5309 		     * For expat compatibility in SAX mode.
5310 		     * assuming the entity repalcement was asked for
5311 		     */
5312 		    if ((ctxt->replaceEntities != 0) &&
5313 			((ctxt->myDoc == NULL) ||
5314 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315 			if (ctxt->myDoc == NULL) {
5316 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5317 			    if (ctxt->myDoc == NULL) {
5318 			        xmlErrMemory(ctxt, "New Doc failed");
5319 				return;
5320 			    }
5321 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5322 			}
5323 
5324 			if (ctxt->myDoc->intSubset == NULL)
5325 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326 						BAD_CAST "fake", NULL, NULL);
5327 			xmlSAX2EntityDecl(ctxt, name,
5328 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329 				          literal, URI, NULL);
5330 		    }
5331 		}
5332 	    }
5333 	}
5334 	SKIP_BLANKS;
5335 	if (RAW != '>') {
5336 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5337 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5338 	} else {
5339 	    if (input != ctxt->input) {
5340 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341 	"Entity declaration doesn't start and stop in the same entity\n");
5342 	    }
5343 	    NEXT;
5344 	}
5345 	if (orig != NULL) {
5346 	    /*
5347 	     * Ugly mechanism to save the raw entity value.
5348 	     */
5349 	    xmlEntityPtr cur = NULL;
5350 
5351 	    if (isParameter) {
5352 	        if ((ctxt->sax != NULL) &&
5353 		    (ctxt->sax->getParameterEntity != NULL))
5354 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5355 	    } else {
5356 	        if ((ctxt->sax != NULL) &&
5357 		    (ctxt->sax->getEntity != NULL))
5358 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5359 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5360 		    cur = xmlSAX2GetEntity(ctxt, name);
5361 		}
5362 	    }
5363             if (cur != NULL) {
5364 	        if (cur->orig != NULL)
5365 		    xmlFree(orig);
5366 		else
5367 		    cur->orig = orig;
5368 	    } else
5369 		xmlFree(orig);
5370 	}
5371 	if (value != NULL) xmlFree(value);
5372 	if (URI != NULL) xmlFree(URI);
5373 	if (literal != NULL) xmlFree(literal);
5374     }
5375 }
5376 
5377 /**
5378  * xmlParseDefaultDecl:
5379  * @ctxt:  an XML parser context
5380  * @value:  Receive a possible fixed default value for the attribute
5381  *
5382  * Parse an attribute default declaration
5383  *
5384  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5385  *
5386  * [ VC: Required Attribute ]
5387  * if the default declaration is the keyword #REQUIRED, then the
5388  * attribute must be specified for all elements of the type in the
5389  * attribute-list declaration.
5390  *
5391  * [ VC: Attribute Default Legal ]
5392  * The declared default value must meet the lexical constraints of
5393  * the declared attribute type c.f. xmlValidateAttributeDecl()
5394  *
5395  * [ VC: Fixed Attribute Default ]
5396  * if an attribute has a default value declared with the #FIXED
5397  * keyword, instances of that attribute must match the default value.
5398  *
5399  * [ WFC: No < in Attribute Values ]
5400  * handled in xmlParseAttValue()
5401  *
5402  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403  *          or XML_ATTRIBUTE_FIXED.
5404  */
5405 
5406 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5407 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5408     int val;
5409     xmlChar *ret;
5410 
5411     *value = NULL;
5412     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5413 	SKIP(9);
5414 	return(XML_ATTRIBUTE_REQUIRED);
5415     }
5416     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5417 	SKIP(8);
5418 	return(XML_ATTRIBUTE_IMPLIED);
5419     }
5420     val = XML_ATTRIBUTE_NONE;
5421     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5422 	SKIP(6);
5423 	val = XML_ATTRIBUTE_FIXED;
5424 	if (!IS_BLANK_CH(CUR)) {
5425 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426 			   "Space required after '#FIXED'\n");
5427 	}
5428 	SKIP_BLANKS;
5429     }
5430     ret = xmlParseAttValue(ctxt);
5431     ctxt->instate = XML_PARSER_DTD;
5432     if (ret == NULL) {
5433 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5434 		       "Attribute default value declaration error\n");
5435     } else
5436         *value = ret;
5437     return(val);
5438 }
5439 
5440 /**
5441  * xmlParseNotationType:
5442  * @ctxt:  an XML parser context
5443  *
5444  * parse an Notation attribute type.
5445  *
5446  * Note: the leading 'NOTATION' S part has already being parsed...
5447  *
5448  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5449  *
5450  * [ VC: Notation Attributes ]
5451  * Values of this type must match one of the notation names included
5452  * in the declaration; all notation names in the declaration must be declared.
5453  *
5454  * Returns: the notation attribute tree built while parsing
5455  */
5456 
5457 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5458 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5459     const xmlChar *name;
5460     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5461 
5462     if (RAW != '(') {
5463 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5464 	return(NULL);
5465     }
5466     SHRINK;
5467     do {
5468         NEXT;
5469 	SKIP_BLANKS;
5470         name = xmlParseName(ctxt);
5471 	if (name == NULL) {
5472 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473 			   "Name expected in NOTATION declaration\n");
5474             xmlFreeEnumeration(ret);
5475 	    return(NULL);
5476 	}
5477 	tmp = ret;
5478 	while (tmp != NULL) {
5479 	    if (xmlStrEqual(name, tmp->name)) {
5480 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481 	  "standalone: attribute notation value token %s duplicated\n",
5482 				 name, NULL);
5483 		if (!xmlDictOwns(ctxt->dict, name))
5484 		    xmlFree((xmlChar *) name);
5485 		break;
5486 	    }
5487 	    tmp = tmp->next;
5488 	}
5489 	if (tmp == NULL) {
5490 	    cur = xmlCreateEnumeration(name);
5491 	    if (cur == NULL) {
5492                 xmlFreeEnumeration(ret);
5493                 return(NULL);
5494             }
5495 	    if (last == NULL) ret = last = cur;
5496 	    else {
5497 		last->next = cur;
5498 		last = cur;
5499 	    }
5500 	}
5501 	SKIP_BLANKS;
5502     } while (RAW == '|');
5503     if (RAW != ')') {
5504 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5505         xmlFreeEnumeration(ret);
5506 	return(NULL);
5507     }
5508     NEXT;
5509     return(ret);
5510 }
5511 
5512 /**
5513  * xmlParseEnumerationType:
5514  * @ctxt:  an XML parser context
5515  *
5516  * parse an Enumeration attribute type.
5517  *
5518  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5519  *
5520  * [ VC: Enumeration ]
5521  * Values of this type must match one of the Nmtoken tokens in
5522  * the declaration
5523  *
5524  * Returns: the enumeration attribute tree built while parsing
5525  */
5526 
5527 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5528 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5529     xmlChar *name;
5530     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5531 
5532     if (RAW != '(') {
5533 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5534 	return(NULL);
5535     }
5536     SHRINK;
5537     do {
5538         NEXT;
5539 	SKIP_BLANKS;
5540         name = xmlParseNmtoken(ctxt);
5541 	if (name == NULL) {
5542 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5543 	    return(ret);
5544 	}
5545 	tmp = ret;
5546 	while (tmp != NULL) {
5547 	    if (xmlStrEqual(name, tmp->name)) {
5548 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549 	  "standalone: attribute enumeration value token %s duplicated\n",
5550 				 name, NULL);
5551 		if (!xmlDictOwns(ctxt->dict, name))
5552 		    xmlFree(name);
5553 		break;
5554 	    }
5555 	    tmp = tmp->next;
5556 	}
5557 	if (tmp == NULL) {
5558 	    cur = xmlCreateEnumeration(name);
5559 	    if (!xmlDictOwns(ctxt->dict, name))
5560 		xmlFree(name);
5561 	    if (cur == NULL) {
5562                 xmlFreeEnumeration(ret);
5563                 return(NULL);
5564             }
5565 	    if (last == NULL) ret = last = cur;
5566 	    else {
5567 		last->next = cur;
5568 		last = cur;
5569 	    }
5570 	}
5571 	SKIP_BLANKS;
5572     } while (RAW == '|');
5573     if (RAW != ')') {
5574 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5575 	return(ret);
5576     }
5577     NEXT;
5578     return(ret);
5579 }
5580 
5581 /**
5582  * xmlParseEnumeratedType:
5583  * @ctxt:  an XML parser context
5584  * @tree:  the enumeration tree built while parsing
5585  *
5586  * parse an Enumerated attribute type.
5587  *
5588  * [57] EnumeratedType ::= NotationType | Enumeration
5589  *
5590  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5591  *
5592  *
5593  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5594  */
5595 
5596 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5597 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5598     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5599 	SKIP(8);
5600 	if (!IS_BLANK_CH(CUR)) {
5601 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602 			   "Space required after 'NOTATION'\n");
5603 	    return(0);
5604 	}
5605         SKIP_BLANKS;
5606 	*tree = xmlParseNotationType(ctxt);
5607 	if (*tree == NULL) return(0);
5608 	return(XML_ATTRIBUTE_NOTATION);
5609     }
5610     *tree = xmlParseEnumerationType(ctxt);
5611     if (*tree == NULL) return(0);
5612     return(XML_ATTRIBUTE_ENUMERATION);
5613 }
5614 
5615 /**
5616  * xmlParseAttributeType:
5617  * @ctxt:  an XML parser context
5618  * @tree:  the enumeration tree built while parsing
5619  *
5620  * parse the Attribute list def for an element
5621  *
5622  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5623  *
5624  * [55] StringType ::= 'CDATA'
5625  *
5626  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5628  *
5629  * Validity constraints for attribute values syntax are checked in
5630  * xmlValidateAttributeValue()
5631  *
5632  * [ VC: ID ]
5633  * Values of type ID must match the Name production. A name must not
5634  * appear more than once in an XML document as a value of this type;
5635  * i.e., ID values must uniquely identify the elements which bear them.
5636  *
5637  * [ VC: One ID per Element Type ]
5638  * No element type may have more than one ID attribute specified.
5639  *
5640  * [ VC: ID Attribute Default ]
5641  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5642  *
5643  * [ VC: IDREF ]
5644  * Values of type IDREF must match the Name production, and values
5645  * of type IDREFS must match Names; each IDREF Name must match the value
5646  * of an ID attribute on some element in the XML document; i.e. IDREF
5647  * values must match the value of some ID attribute.
5648  *
5649  * [ VC: Entity Name ]
5650  * Values of type ENTITY must match the Name production, values
5651  * of type ENTITIES must match Names; each Entity Name must match the
5652  * name of an unparsed entity declared in the DTD.
5653  *
5654  * [ VC: Name Token ]
5655  * Values of type NMTOKEN must match the Nmtoken production; values
5656  * of type NMTOKENS must match Nmtokens.
5657  *
5658  * Returns the attribute type
5659  */
5660 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5661 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5662     SHRINK;
5663     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5664 	SKIP(5);
5665 	return(XML_ATTRIBUTE_CDATA);
5666      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5667 	SKIP(6);
5668 	return(XML_ATTRIBUTE_IDREFS);
5669      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5670 	SKIP(5);
5671 	return(XML_ATTRIBUTE_IDREF);
5672      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5673         SKIP(2);
5674 	return(XML_ATTRIBUTE_ID);
5675      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5676 	SKIP(6);
5677 	return(XML_ATTRIBUTE_ENTITY);
5678      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5679 	SKIP(8);
5680 	return(XML_ATTRIBUTE_ENTITIES);
5681      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5682 	SKIP(8);
5683 	return(XML_ATTRIBUTE_NMTOKENS);
5684      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5685 	SKIP(7);
5686 	return(XML_ATTRIBUTE_NMTOKEN);
5687      }
5688      return(xmlParseEnumeratedType(ctxt, tree));
5689 }
5690 
5691 /**
5692  * xmlParseAttributeListDecl:
5693  * @ctxt:  an XML parser context
5694  *
5695  * : parse the Attribute list def for an element
5696  *
5697  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5698  *
5699  * [53] AttDef ::= S Name S AttType S DefaultDecl
5700  *
5701  */
5702 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5703 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5704     const xmlChar *elemName;
5705     const xmlChar *attrName;
5706     xmlEnumerationPtr tree;
5707 
5708     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5709 	xmlParserInputPtr input = ctxt->input;
5710 
5711 	SKIP(9);
5712 	if (!IS_BLANK_CH(CUR)) {
5713 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714 		                 "Space required after '<!ATTLIST'\n");
5715 	}
5716         SKIP_BLANKS;
5717         elemName = xmlParseName(ctxt);
5718 	if (elemName == NULL) {
5719 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720 			   "ATTLIST: no name for Element\n");
5721 	    return;
5722 	}
5723 	SKIP_BLANKS;
5724 	GROW;
5725 	while (RAW != '>') {
5726 	    const xmlChar *check = CUR_PTR;
5727 	    int type;
5728 	    int def;
5729 	    xmlChar *defaultValue = NULL;
5730 
5731 	    GROW;
5732             tree = NULL;
5733 	    attrName = xmlParseName(ctxt);
5734 	    if (attrName == NULL) {
5735 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736 			       "ATTLIST: no name for Attribute\n");
5737 		break;
5738 	    }
5739 	    GROW;
5740 	    if (!IS_BLANK_CH(CUR)) {
5741 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5742 		        "Space required after the attribute name\n");
5743 		break;
5744 	    }
5745 	    SKIP_BLANKS;
5746 
5747 	    type = xmlParseAttributeType(ctxt, &tree);
5748 	    if (type <= 0) {
5749 	        break;
5750 	    }
5751 
5752 	    GROW;
5753 	    if (!IS_BLANK_CH(CUR)) {
5754 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755 			       "Space required after the attribute type\n");
5756 	        if (tree != NULL)
5757 		    xmlFreeEnumeration(tree);
5758 		break;
5759 	    }
5760 	    SKIP_BLANKS;
5761 
5762 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5763 	    if (def <= 0) {
5764                 if (defaultValue != NULL)
5765 		    xmlFree(defaultValue);
5766 	        if (tree != NULL)
5767 		    xmlFreeEnumeration(tree);
5768 	        break;
5769 	    }
5770 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5772 
5773 	    GROW;
5774             if (RAW != '>') {
5775 		if (!IS_BLANK_CH(CUR)) {
5776 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777 			"Space required after the attribute default value\n");
5778 		    if (defaultValue != NULL)
5779 			xmlFree(defaultValue);
5780 		    if (tree != NULL)
5781 			xmlFreeEnumeration(tree);
5782 		    break;
5783 		}
5784 		SKIP_BLANKS;
5785 	    }
5786 	    if (check == CUR_PTR) {
5787 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788 		            "in xmlParseAttributeListDecl\n");
5789 		if (defaultValue != NULL)
5790 		    xmlFree(defaultValue);
5791 	        if (tree != NULL)
5792 		    xmlFreeEnumeration(tree);
5793 		break;
5794 	    }
5795 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796 		(ctxt->sax->attributeDecl != NULL))
5797 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798 	                        type, def, defaultValue, tree);
5799 	    else if (tree != NULL)
5800 		xmlFreeEnumeration(tree);
5801 
5802 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5803 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5804 		(def != XML_ATTRIBUTE_REQUIRED)) {
5805 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5806 	    }
5807 	    if (ctxt->sax2) {
5808 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5809 	    }
5810 	    if (defaultValue != NULL)
5811 	        xmlFree(defaultValue);
5812 	    GROW;
5813 	}
5814 	if (RAW == '>') {
5815 	    if (input != ctxt->input) {
5816 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817     "Attribute list declaration doesn't start and stop in the same entity\n",
5818                                  NULL, NULL);
5819 	    }
5820 	    NEXT;
5821 	}
5822     }
5823 }
5824 
5825 /**
5826  * xmlParseElementMixedContentDecl:
5827  * @ctxt:  an XML parser context
5828  * @inputchk:  the input used for the current entity, needed for boundary checks
5829  *
5830  * parse the declaration for a Mixed Element content
5831  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5832  *
5833  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834  *                '(' S? '#PCDATA' S? ')'
5835  *
5836  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5837  *
5838  * [ VC: No Duplicate Types ]
5839  * The same name must not appear more than once in a single
5840  * mixed-content declaration.
5841  *
5842  * returns: the list of the xmlElementContentPtr describing the element choices
5843  */
5844 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5845 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5846     xmlElementContentPtr ret = NULL, cur = NULL, n;
5847     const xmlChar *elem = NULL;
5848 
5849     GROW;
5850     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5851 	SKIP(7);
5852 	SKIP_BLANKS;
5853 	SHRINK;
5854 	if (RAW == ')') {
5855 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5856 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857 "Element content declaration doesn't start and stop in the same entity\n",
5858                                  NULL, NULL);
5859 	    }
5860 	    NEXT;
5861 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5862 	    if (ret == NULL)
5863 	        return(NULL);
5864 	    if (RAW == '*') {
5865 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
5866 		NEXT;
5867 	    }
5868 	    return(ret);
5869 	}
5870 	if ((RAW == '(') || (RAW == '|')) {
5871 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5872 	    if (ret == NULL) return(NULL);
5873 	}
5874 	while (RAW == '|') {
5875 	    NEXT;
5876 	    if (elem == NULL) {
5877 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5878 		if (ret == NULL) return(NULL);
5879 		ret->c1 = cur;
5880 		if (cur != NULL)
5881 		    cur->parent = ret;
5882 		cur = ret;
5883 	    } else {
5884 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5885 		if (n == NULL) return(NULL);
5886 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5887 		if (n->c1 != NULL)
5888 		    n->c1->parent = n;
5889 	        cur->c2 = n;
5890 		if (n != NULL)
5891 		    n->parent = cur;
5892 		cur = n;
5893 	    }
5894 	    SKIP_BLANKS;
5895 	    elem = xmlParseName(ctxt);
5896 	    if (elem == NULL) {
5897 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5898 			"xmlParseElementMixedContentDecl : Name expected\n");
5899 		xmlFreeDocElementContent(ctxt->myDoc, cur);
5900 		return(NULL);
5901 	    }
5902 	    SKIP_BLANKS;
5903 	    GROW;
5904 	}
5905 	if ((RAW == ')') && (NXT(1) == '*')) {
5906 	    if (elem != NULL) {
5907 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5908 		                               XML_ELEMENT_CONTENT_ELEMENT);
5909 		if (cur->c2 != NULL)
5910 		    cur->c2->parent = cur;
5911             }
5912             if (ret != NULL)
5913                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5914 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5915 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916 "Element content declaration doesn't start and stop in the same entity\n",
5917 				 NULL, NULL);
5918 	    }
5919 	    SKIP(2);
5920 	} else {
5921 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
5922 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5923 	    return(NULL);
5924 	}
5925 
5926     } else {
5927 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5928     }
5929     return(ret);
5930 }
5931 
5932 /**
5933  * xmlParseElementChildrenContentDeclPriv:
5934  * @ctxt:  an XML parser context
5935  * @inputchk:  the input used for the current entity, needed for boundary checks
5936  * @depth: the level of recursion
5937  *
5938  * parse the declaration for a Mixed Element content
5939  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5940  *
5941  *
5942  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5943  *
5944  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5945  *
5946  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5947  *
5948  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5949  *
5950  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951  * TODO Parameter-entity replacement text must be properly nested
5952  *	with parenthesized groups. That is to say, if either of the
5953  *	opening or closing parentheses in a choice, seq, or Mixed
5954  *	construct is contained in the replacement text for a parameter
5955  *	entity, both must be contained in the same replacement text. For
5956  *	interoperability, if a parameter-entity reference appears in a
5957  *	choice, seq, or Mixed construct, its replacement text should not
5958  *	be empty, and neither the first nor last non-blank character of
5959  *	the replacement text should be a connector (| or ,).
5960  *
5961  * Returns the tree of xmlElementContentPtr describing the element
5962  *          hierarchy.
5963  */
5964 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)5965 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5966                                        int depth) {
5967     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5968     const xmlChar *elem;
5969     xmlChar type = 0;
5970 
5971     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5972         (depth >  2048)) {
5973         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5975                           depth);
5976 	return(NULL);
5977     }
5978     SKIP_BLANKS;
5979     GROW;
5980     if (RAW == '(') {
5981 	int inputid = ctxt->input->id;
5982 
5983         /* Recurse on first child */
5984 	NEXT;
5985 	SKIP_BLANKS;
5986         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5987                                                            depth + 1);
5988 	SKIP_BLANKS;
5989 	GROW;
5990     } else {
5991 	elem = xmlParseName(ctxt);
5992 	if (elem == NULL) {
5993 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5994 	    return(NULL);
5995 	}
5996         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5997 	if (cur == NULL) {
5998 	    xmlErrMemory(ctxt, NULL);
5999 	    return(NULL);
6000 	}
6001 	GROW;
6002 	if (RAW == '?') {
6003 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6004 	    NEXT;
6005 	} else if (RAW == '*') {
6006 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6007 	    NEXT;
6008 	} else if (RAW == '+') {
6009 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6010 	    NEXT;
6011 	} else {
6012 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6013 	}
6014 	GROW;
6015     }
6016     SKIP_BLANKS;
6017     SHRINK;
6018     while (RAW != ')') {
6019         /*
6020 	 * Each loop we parse one separator and one element.
6021 	 */
6022         if (RAW == ',') {
6023 	    if (type == 0) type = CUR;
6024 
6025 	    /*
6026 	     * Detect "Name | Name , Name" error
6027 	     */
6028 	    else if (type != CUR) {
6029 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6030 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6031 		                  type);
6032 		if ((last != NULL) && (last != ret))
6033 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6034 		if (ret != NULL)
6035 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6036 		return(NULL);
6037 	    }
6038 	    NEXT;
6039 
6040 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6041 	    if (op == NULL) {
6042 		if ((last != NULL) && (last != ret))
6043 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6044 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6045 		return(NULL);
6046 	    }
6047 	    if (last == NULL) {
6048 		op->c1 = ret;
6049 		if (ret != NULL)
6050 		    ret->parent = op;
6051 		ret = cur = op;
6052 	    } else {
6053 	        cur->c2 = op;
6054 		if (op != NULL)
6055 		    op->parent = cur;
6056 		op->c1 = last;
6057 		if (last != NULL)
6058 		    last->parent = op;
6059 		cur =op;
6060 		last = NULL;
6061 	    }
6062 	} else if (RAW == '|') {
6063 	    if (type == 0) type = CUR;
6064 
6065 	    /*
6066 	     * Detect "Name , Name | Name" error
6067 	     */
6068 	    else if (type != CUR) {
6069 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6071 				  type);
6072 		if ((last != NULL) && (last != ret))
6073 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6074 		if (ret != NULL)
6075 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6076 		return(NULL);
6077 	    }
6078 	    NEXT;
6079 
6080 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6081 	    if (op == NULL) {
6082 		if ((last != NULL) && (last != ret))
6083 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6084 		if (ret != NULL)
6085 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6086 		return(NULL);
6087 	    }
6088 	    if (last == NULL) {
6089 		op->c1 = ret;
6090 		if (ret != NULL)
6091 		    ret->parent = op;
6092 		ret = cur = op;
6093 	    } else {
6094 	        cur->c2 = op;
6095 		if (op != NULL)
6096 		    op->parent = cur;
6097 		op->c1 = last;
6098 		if (last != NULL)
6099 		    last->parent = op;
6100 		cur =op;
6101 		last = NULL;
6102 	    }
6103 	} else {
6104 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6105 	    if ((last != NULL) && (last != ret))
6106 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6107 	    if (ret != NULL)
6108 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6109 	    return(NULL);
6110 	}
6111 	GROW;
6112 	SKIP_BLANKS;
6113 	GROW;
6114 	if (RAW == '(') {
6115 	    int inputid = ctxt->input->id;
6116 	    /* Recurse on second child */
6117 	    NEXT;
6118 	    SKIP_BLANKS;
6119 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6120                                                           depth + 1);
6121 	    SKIP_BLANKS;
6122 	} else {
6123 	    elem = xmlParseName(ctxt);
6124 	    if (elem == NULL) {
6125 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6126 		if (ret != NULL)
6127 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6128 		return(NULL);
6129 	    }
6130 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6131 	    if (last == NULL) {
6132 		if (ret != NULL)
6133 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6134 		return(NULL);
6135 	    }
6136 	    if (RAW == '?') {
6137 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6138 		NEXT;
6139 	    } else if (RAW == '*') {
6140 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6141 		NEXT;
6142 	    } else if (RAW == '+') {
6143 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6144 		NEXT;
6145 	    } else {
6146 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6147 	    }
6148 	}
6149 	SKIP_BLANKS;
6150 	GROW;
6151     }
6152     if ((cur != NULL) && (last != NULL)) {
6153         cur->c2 = last;
6154 	if (last != NULL)
6155 	    last->parent = cur;
6156     }
6157     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6158 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159 "Element content declaration doesn't start and stop in the same entity\n",
6160 			 NULL, NULL);
6161     }
6162     NEXT;
6163     if (RAW == '?') {
6164 	if (ret != NULL) {
6165 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168 	    else
6169 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6170 	}
6171 	NEXT;
6172     } else if (RAW == '*') {
6173 	if (ret != NULL) {
6174 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6175 	    cur = ret;
6176 	    /*
6177 	     * Some normalization:
6178 	     * (a | b* | c?)* == (a | b | c)*
6179 	     */
6180 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6181 		if ((cur->c1 != NULL) &&
6182 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185 		if ((cur->c2 != NULL) &&
6186 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6189 		cur = cur->c2;
6190 	    }
6191 	}
6192 	NEXT;
6193     } else if (RAW == '+') {
6194 	if (ret != NULL) {
6195 	    int found = 0;
6196 
6197 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6200 	    else
6201 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6202 	    /*
6203 	     * Some normalization:
6204 	     * (a | b*)+ == (a | b)*
6205 	     * (a | b?)+ == (a | b)*
6206 	     */
6207 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6208 		if ((cur->c1 != NULL) &&
6209 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 		    found = 1;
6213 		}
6214 		if ((cur->c2 != NULL) &&
6215 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6218 		    found = 1;
6219 		}
6220 		cur = cur->c2;
6221 	    }
6222 	    if (found)
6223 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6224 	}
6225 	NEXT;
6226     }
6227     return(ret);
6228 }
6229 
6230 /**
6231  * xmlParseElementChildrenContentDecl:
6232  * @ctxt:  an XML parser context
6233  * @inputchk:  the input used for the current entity, needed for boundary checks
6234  *
6235  * parse the declaration for a Mixed Element content
6236  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6237  *
6238  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6239  *
6240  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6241  *
6242  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6243  *
6244  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6245  *
6246  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247  * TODO Parameter-entity replacement text must be properly nested
6248  *	with parenthesized groups. That is to say, if either of the
6249  *	opening or closing parentheses in a choice, seq, or Mixed
6250  *	construct is contained in the replacement text for a parameter
6251  *	entity, both must be contained in the same replacement text. For
6252  *	interoperability, if a parameter-entity reference appears in a
6253  *	choice, seq, or Mixed construct, its replacement text should not
6254  *	be empty, and neither the first nor last non-blank character of
6255  *	the replacement text should be a connector (| or ,).
6256  *
6257  * Returns the tree of xmlElementContentPtr describing the element
6258  *          hierarchy.
6259  */
6260 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6261 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262     /* stub left for API/ABI compat */
6263     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6264 }
6265 
6266 /**
6267  * xmlParseElementContentDecl:
6268  * @ctxt:  an XML parser context
6269  * @name:  the name of the element being defined.
6270  * @result:  the Element Content pointer will be stored here if any
6271  *
6272  * parse the declaration for an Element content either Mixed or Children,
6273  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6274  *
6275  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6276  *
6277  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6278  */
6279 
6280 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6281 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6282                            xmlElementContentPtr *result) {
6283 
6284     xmlElementContentPtr tree = NULL;
6285     int inputid = ctxt->input->id;
6286     int res;
6287 
6288     *result = NULL;
6289 
6290     if (RAW != '(') {
6291 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6292 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6293 	return(-1);
6294     }
6295     NEXT;
6296     GROW;
6297     SKIP_BLANKS;
6298     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6299         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6300 	res = XML_ELEMENT_TYPE_MIXED;
6301     } else {
6302         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6303 	res = XML_ELEMENT_TYPE_ELEMENT;
6304     }
6305     SKIP_BLANKS;
6306     *result = tree;
6307     return(res);
6308 }
6309 
6310 /**
6311  * xmlParseElementDecl:
6312  * @ctxt:  an XML parser context
6313  *
6314  * parse an Element declaration.
6315  *
6316  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6317  *
6318  * [ VC: Unique Element Type Declaration ]
6319  * No element type may be declared more than once
6320  *
6321  * Returns the type of the element, or -1 in case of error
6322  */
6323 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6324 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6325     const xmlChar *name;
6326     int ret = -1;
6327     xmlElementContentPtr content  = NULL;
6328 
6329     /* GROW; done in the caller */
6330     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6331 	xmlParserInputPtr input = ctxt->input;
6332 
6333 	SKIP(9);
6334 	if (!IS_BLANK_CH(CUR)) {
6335 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336 		           "Space required after 'ELEMENT'\n");
6337 	}
6338         SKIP_BLANKS;
6339         name = xmlParseName(ctxt);
6340 	if (name == NULL) {
6341 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342 			   "xmlParseElementDecl: no name for Element\n");
6343 	    return(-1);
6344 	}
6345 	while ((RAW == 0) && (ctxt->inputNr > 1))
6346 	    xmlPopInput(ctxt);
6347 	if (!IS_BLANK_CH(CUR)) {
6348 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349 			   "Space required after the element name\n");
6350 	}
6351         SKIP_BLANKS;
6352 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6353 	    SKIP(5);
6354 	    /*
6355 	     * Element must always be empty.
6356 	     */
6357 	    ret = XML_ELEMENT_TYPE_EMPTY;
6358 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6359 	           (NXT(2) == 'Y')) {
6360 	    SKIP(3);
6361 	    /*
6362 	     * Element is a generic container.
6363 	     */
6364 	    ret = XML_ELEMENT_TYPE_ANY;
6365 	} else if (RAW == '(') {
6366 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6367 	} else {
6368 	    /*
6369 	     * [ WFC: PEs in Internal Subset ] error handling.
6370 	     */
6371 	    if ((RAW == '%') && (ctxt->external == 0) &&
6372 	        (ctxt->inputNr == 1)) {
6373 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6374 	  "PEReference: forbidden within markup decl in internal subset\n");
6375 	    } else {
6376 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6377 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6378             }
6379 	    return(-1);
6380 	}
6381 
6382 	SKIP_BLANKS;
6383 	/*
6384 	 * Pop-up of finished entities.
6385 	 */
6386 	while ((RAW == 0) && (ctxt->inputNr > 1))
6387 	    xmlPopInput(ctxt);
6388 	SKIP_BLANKS;
6389 
6390 	if (RAW != '>') {
6391 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6392 	    if (content != NULL) {
6393 		xmlFreeDocElementContent(ctxt->myDoc, content);
6394 	    }
6395 	} else {
6396 	    if (input != ctxt->input) {
6397 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398     "Element declaration doesn't start and stop in the same entity\n");
6399 	    }
6400 
6401 	    NEXT;
6402 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6403 		(ctxt->sax->elementDecl != NULL)) {
6404 		if (content != NULL)
6405 		    content->parent = NULL;
6406 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6407 		                       content);
6408 		if ((content != NULL) && (content->parent == NULL)) {
6409 		    /*
6410 		     * this is a trick: if xmlAddElementDecl is called,
6411 		     * instead of copying the full tree it is plugged directly
6412 		     * if called from the parser. Avoid duplicating the
6413 		     * interfaces or change the API/ABI
6414 		     */
6415 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6416 		}
6417 	    } else if (content != NULL) {
6418 		xmlFreeDocElementContent(ctxt->myDoc, content);
6419 	    }
6420 	}
6421     }
6422     return(ret);
6423 }
6424 
6425 /**
6426  * xmlParseConditionalSections
6427  * @ctxt:  an XML parser context
6428  *
6429  * [61] conditionalSect ::= includeSect | ignoreSect
6430  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6434  */
6435 
6436 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6437 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6438     int id = ctxt->input->id;
6439 
6440     SKIP(3);
6441     SKIP_BLANKS;
6442     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6443 	SKIP(7);
6444 	SKIP_BLANKS;
6445 	if (RAW != '[') {
6446 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6447 	} else {
6448 	    if (ctxt->input->id != id) {
6449 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450 	    "All markup of the conditional section is not in the same entity\n",
6451 				     NULL, NULL);
6452 	    }
6453 	    NEXT;
6454 	}
6455 	if (xmlParserDebugEntities) {
6456 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6457 		xmlGenericError(xmlGenericErrorContext,
6458 			"%s(%d): ", ctxt->input->filename,
6459 			ctxt->input->line);
6460 	    xmlGenericError(xmlGenericErrorContext,
6461 		    "Entering INCLUDE Conditional Section\n");
6462 	}
6463 
6464 	while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6465 	       (NXT(2) != '>'))) {
6466 	    const xmlChar *check = CUR_PTR;
6467 	    unsigned int cons = ctxt->input->consumed;
6468 
6469 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470 		xmlParseConditionalSections(ctxt);
6471 	    } else if (IS_BLANK_CH(CUR)) {
6472 		NEXT;
6473 	    } else if (RAW == '%') {
6474 		xmlParsePEReference(ctxt);
6475 	    } else
6476 		xmlParseMarkupDecl(ctxt);
6477 
6478 	    /*
6479 	     * Pop-up of finished entities.
6480 	     */
6481 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6482 		xmlPopInput(ctxt);
6483 
6484 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6485 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6486 		break;
6487 	    }
6488 	}
6489 	if (xmlParserDebugEntities) {
6490 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6491 		xmlGenericError(xmlGenericErrorContext,
6492 			"%s(%d): ", ctxt->input->filename,
6493 			ctxt->input->line);
6494 	    xmlGenericError(xmlGenericErrorContext,
6495 		    "Leaving INCLUDE Conditional Section\n");
6496 	}
6497 
6498     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6499 	int state;
6500 	xmlParserInputState instate;
6501 	int depth = 0;
6502 
6503 	SKIP(6);
6504 	SKIP_BLANKS;
6505 	if (RAW != '[') {
6506 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6507 	} else {
6508 	    if (ctxt->input->id != id) {
6509 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510 	    "All markup of the conditional section is not in the same entity\n",
6511 				     NULL, NULL);
6512 	    }
6513 	    NEXT;
6514 	}
6515 	if (xmlParserDebugEntities) {
6516 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6517 		xmlGenericError(xmlGenericErrorContext,
6518 			"%s(%d): ", ctxt->input->filename,
6519 			ctxt->input->line);
6520 	    xmlGenericError(xmlGenericErrorContext,
6521 		    "Entering IGNORE Conditional Section\n");
6522 	}
6523 
6524 	/*
6525 	 * Parse up to the end of the conditional section
6526 	 * But disable SAX event generating DTD building in the meantime
6527 	 */
6528 	state = ctxt->disableSAX;
6529 	instate = ctxt->instate;
6530 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6531 	ctxt->instate = XML_PARSER_IGNORE;
6532 
6533 	while ((depth >= 0) && (RAW != 0)) {
6534 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535 	    depth++;
6536 	    SKIP(3);
6537 	    continue;
6538 	  }
6539 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540 	    if (--depth >= 0) SKIP(3);
6541 	    continue;
6542 	  }
6543 	  NEXT;
6544 	  continue;
6545 	}
6546 
6547 	ctxt->disableSAX = state;
6548 	ctxt->instate = instate;
6549 
6550 	if (xmlParserDebugEntities) {
6551 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6552 		xmlGenericError(xmlGenericErrorContext,
6553 			"%s(%d): ", ctxt->input->filename,
6554 			ctxt->input->line);
6555 	    xmlGenericError(xmlGenericErrorContext,
6556 		    "Leaving IGNORE Conditional Section\n");
6557 	}
6558 
6559     } else {
6560 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6561     }
6562 
6563     if (RAW == 0)
6564         SHRINK;
6565 
6566     if (RAW == 0) {
6567 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6568     } else {
6569 	if (ctxt->input->id != id) {
6570 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571 	"All markup of the conditional section is not in the same entity\n",
6572 				 NULL, NULL);
6573 	}
6574         SKIP(3);
6575     }
6576 }
6577 
6578 /**
6579  * xmlParseMarkupDecl:
6580  * @ctxt:  an XML parser context
6581  *
6582  * parse Markup declarations
6583  *
6584  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585  *                     NotationDecl | PI | Comment
6586  *
6587  * [ VC: Proper Declaration/PE Nesting ]
6588  * Parameter-entity replacement text must be properly nested with
6589  * markup declarations. That is to say, if either the first character
6590  * or the last character of a markup declaration (markupdecl above) is
6591  * contained in the replacement text for a parameter-entity reference,
6592  * both must be contained in the same replacement text.
6593  *
6594  * [ WFC: PEs in Internal Subset ]
6595  * In the internal DTD subset, parameter-entity references can occur
6596  * only where markup declarations can occur, not within markup declarations.
6597  * (This does not apply to references that occur in external parameter
6598  * entities or to the external subset.)
6599  */
6600 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6601 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6602     GROW;
6603     if (CUR == '<') {
6604         if (NXT(1) == '!') {
6605 	    switch (NXT(2)) {
6606 	        case 'E':
6607 		    if (NXT(3) == 'L')
6608 			xmlParseElementDecl(ctxt);
6609 		    else if (NXT(3) == 'N')
6610 			xmlParseEntityDecl(ctxt);
6611 		    break;
6612 	        case 'A':
6613 		    xmlParseAttributeListDecl(ctxt);
6614 		    break;
6615 	        case 'N':
6616 		    xmlParseNotationDecl(ctxt);
6617 		    break;
6618 	        case '-':
6619 		    xmlParseComment(ctxt);
6620 		    break;
6621 		default:
6622 		    /* there is an error but it will be detected later */
6623 		    break;
6624 	    }
6625 	} else if (NXT(1) == '?') {
6626 	    xmlParsePI(ctxt);
6627 	}
6628     }
6629     /*
6630      * This is only for internal subset. On external entities,
6631      * the replacement is done before parsing stage
6632      */
6633     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634 	xmlParsePEReference(ctxt);
6635 
6636     /*
6637      * Conditional sections are allowed from entities included
6638      * by PE References in the internal subset.
6639      */
6640     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642 	    xmlParseConditionalSections(ctxt);
6643 	}
6644     }
6645 
6646     ctxt->instate = XML_PARSER_DTD;
6647 }
6648 
6649 /**
6650  * xmlParseTextDecl:
6651  * @ctxt:  an XML parser context
6652  *
6653  * parse an XML declaration header for external entities
6654  *
6655  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6656  */
6657 
6658 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6659 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6660     xmlChar *version;
6661     const xmlChar *encoding;
6662 
6663     /*
6664      * We know that '<?xml' is here.
6665      */
6666     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6667 	SKIP(5);
6668     } else {
6669 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6670 	return;
6671     }
6672 
6673     if (!IS_BLANK_CH(CUR)) {
6674 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675 		       "Space needed after '<?xml'\n");
6676     }
6677     SKIP_BLANKS;
6678 
6679     /*
6680      * We may have the VersionInfo here.
6681      */
6682     version = xmlParseVersionInfo(ctxt);
6683     if (version == NULL)
6684 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6685     else {
6686 	if (!IS_BLANK_CH(CUR)) {
6687 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 		           "Space needed here\n");
6689 	}
6690     }
6691     ctxt->input->version = version;
6692 
6693     /*
6694      * We must have the encoding declaration
6695      */
6696     encoding = xmlParseEncodingDecl(ctxt);
6697     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6698 	/*
6699 	 * The XML REC instructs us to stop parsing right here
6700 	 */
6701         return;
6702     }
6703     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705 		       "Missing encoding in text declaration\n");
6706     }
6707 
6708     SKIP_BLANKS;
6709     if ((RAW == '?') && (NXT(1) == '>')) {
6710         SKIP(2);
6711     } else if (RAW == '>') {
6712         /* Deprecated old WD ... */
6713 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6714 	NEXT;
6715     } else {
6716 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6717 	MOVETO_ENDTAG(CUR_PTR);
6718 	NEXT;
6719     }
6720 }
6721 
6722 /**
6723  * xmlParseExternalSubset:
6724  * @ctxt:  an XML parser context
6725  * @ExternalID: the external identifier
6726  * @SystemID: the system identifier (or URL)
6727  *
6728  * parse Markup declarations from an external subset
6729  *
6730  * [30] extSubset ::= textDecl? extSubsetDecl
6731  *
6732  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6733  */
6734 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6735 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736                        const xmlChar *SystemID) {
6737     xmlDetectSAX2(ctxt);
6738     GROW;
6739 
6740     if ((ctxt->encoding == NULL) &&
6741         (ctxt->input->end - ctxt->input->cur >= 4)) {
6742         xmlChar start[4];
6743 	xmlCharEncoding enc;
6744 
6745 	start[0] = RAW;
6746 	start[1] = NXT(1);
6747 	start[2] = NXT(2);
6748 	start[3] = NXT(3);
6749 	enc = xmlDetectCharEncoding(start, 4);
6750 	if (enc != XML_CHAR_ENCODING_NONE)
6751 	    xmlSwitchEncoding(ctxt, enc);
6752     }
6753 
6754     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6755 	xmlParseTextDecl(ctxt);
6756 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6757 	    /*
6758 	     * The XML REC instructs us to stop parsing right here
6759 	     */
6760 	    ctxt->instate = XML_PARSER_EOF;
6761 	    return;
6762 	}
6763     }
6764     if (ctxt->myDoc == NULL) {
6765         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6766 	if (ctxt->myDoc == NULL) {
6767 	    xmlErrMemory(ctxt, "New Doc failed");
6768 	    return;
6769 	}
6770 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6771     }
6772     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6774 
6775     ctxt->instate = XML_PARSER_DTD;
6776     ctxt->external = 1;
6777     while (((RAW == '<') && (NXT(1) == '?')) ||
6778            ((RAW == '<') && (NXT(1) == '!')) ||
6779 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
6780 	const xmlChar *check = CUR_PTR;
6781 	unsigned int cons = ctxt->input->consumed;
6782 
6783 	GROW;
6784         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785 	    xmlParseConditionalSections(ctxt);
6786 	} else if (IS_BLANK_CH(CUR)) {
6787 	    NEXT;
6788 	} else if (RAW == '%') {
6789             xmlParsePEReference(ctxt);
6790 	} else
6791 	    xmlParseMarkupDecl(ctxt);
6792 
6793 	/*
6794 	 * Pop-up of finished entities.
6795 	 */
6796 	while ((RAW == 0) && (ctxt->inputNr > 1))
6797 	    xmlPopInput(ctxt);
6798 
6799 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6800 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6801 	    break;
6802 	}
6803     }
6804 
6805     if (RAW != 0) {
6806 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6807     }
6808 
6809 }
6810 
6811 /**
6812  * xmlParseReference:
6813  * @ctxt:  an XML parser context
6814  *
6815  * parse and handle entity references in content, depending on the SAX
6816  * interface, this may end-up in a call to character() if this is a
6817  * CharRef, a predefined entity, if there is no reference() callback.
6818  * or if the parser was asked to switch to that mode.
6819  *
6820  * [67] Reference ::= EntityRef | CharRef
6821  */
6822 void
xmlParseReference(xmlParserCtxtPtr ctxt)6823 xmlParseReference(xmlParserCtxtPtr ctxt) {
6824     xmlEntityPtr ent;
6825     xmlChar *val;
6826     int was_checked;
6827     xmlNodePtr list = NULL;
6828     xmlParserErrors ret = XML_ERR_OK;
6829 
6830 
6831     if (RAW != '&')
6832         return;
6833 
6834     /*
6835      * Simple case of a CharRef
6836      */
6837     if (NXT(1) == '#') {
6838 	int i = 0;
6839 	xmlChar out[10];
6840 	int hex = NXT(2);
6841 	int value = xmlParseCharRef(ctxt);
6842 
6843 	if (value == 0)
6844 	    return;
6845 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6846 	    /*
6847 	     * So we are using non-UTF-8 buffers
6848 	     * Check that the char fit on 8bits, if not
6849 	     * generate a CharRef.
6850 	     */
6851 	    if (value <= 0xFF) {
6852 		out[0] = value;
6853 		out[1] = 0;
6854 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855 		    (!ctxt->disableSAX))
6856 		    ctxt->sax->characters(ctxt->userData, out, 1);
6857 	    } else {
6858 		if ((hex == 'x') || (hex == 'X'))
6859 		    snprintf((char *)out, sizeof(out), "#x%X", value);
6860 		else
6861 		    snprintf((char *)out, sizeof(out), "#%d", value);
6862 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863 		    (!ctxt->disableSAX))
6864 		    ctxt->sax->reference(ctxt->userData, out);
6865 	    }
6866 	} else {
6867 	    /*
6868 	     * Just encode the value in UTF-8
6869 	     */
6870 	    COPY_BUF(0 ,out, i, value);
6871 	    out[i] = 0;
6872 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873 		(!ctxt->disableSAX))
6874 		ctxt->sax->characters(ctxt->userData, out, i);
6875 	}
6876 	return;
6877     }
6878 
6879     /*
6880      * We are seeing an entity reference
6881      */
6882     ent = xmlParseEntityRef(ctxt);
6883     if (ent == NULL) return;
6884     if (!ctxt->wellFormed)
6885 	return;
6886     was_checked = ent->checked;
6887 
6888     /* special case of predefined entities */
6889     if ((ent->name == NULL) ||
6890         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6891 	val = ent->content;
6892 	if (val == NULL) return;
6893 	/*
6894 	 * inline the entity.
6895 	 */
6896 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 	    (!ctxt->disableSAX))
6898 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6899 	return;
6900     }
6901 
6902     /*
6903      * The first reference to the entity trigger a parsing phase
6904      * where the ent->children is filled with the result from
6905      * the parsing.
6906      */
6907     if (ent->checked == 0) {
6908 	unsigned long oldnbent = ctxt->nbentities;
6909 
6910 	/*
6911 	 * This is a bit hackish but this seems the best
6912 	 * way to make sure both SAX and DOM entity support
6913 	 * behaves okay.
6914 	 */
6915 	void *user_data;
6916 	if (ctxt->userData == ctxt)
6917 	    user_data = NULL;
6918 	else
6919 	    user_data = ctxt->userData;
6920 
6921 	/*
6922 	 * Check that this entity is well formed
6923 	 * 4.3.2: An internal general parsed entity is well-formed
6924 	 * if its replacement text matches the production labeled
6925 	 * content.
6926 	 */
6927 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6928 	    ctxt->depth++;
6929 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6930 	                                              user_data, &list);
6931 	    ctxt->depth--;
6932 
6933 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6934 	    ctxt->depth++;
6935 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936 	                                   user_data, ctxt->depth, ent->URI,
6937 					   ent->ExternalID, &list);
6938 	    ctxt->depth--;
6939 	} else {
6940 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
6941 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942 			 "invalid entity type found\n", NULL);
6943 	}
6944 
6945 	/*
6946 	 * Store the number of entities needing parsing for this entity
6947 	 * content and do checkings
6948 	 */
6949 	ent->checked = ctxt->nbentities - oldnbent;
6950 	if (ret == XML_ERR_ENTITY_LOOP) {
6951 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6952 	    xmlFreeNodeList(list);
6953 	    return;
6954 	}
6955 	if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956 	    xmlFreeNodeList(list);
6957 	    return;
6958 	}
6959 
6960 	if ((ret == XML_ERR_OK) && (list != NULL)) {
6961 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963 		(ent->children == NULL)) {
6964 		ent->children = list;
6965 		if (ctxt->replaceEntities) {
6966 		    /*
6967 		     * Prune it directly in the generated document
6968 		     * except for single text nodes.
6969 		     */
6970 		    if (((list->type == XML_TEXT_NODE) &&
6971 			 (list->next == NULL)) ||
6972 			(ctxt->parseMode == XML_PARSE_READER)) {
6973 			list->parent = (xmlNodePtr) ent;
6974 			list = NULL;
6975 			ent->owner = 1;
6976 		    } else {
6977 			ent->owner = 0;
6978 			while (list != NULL) {
6979 			    list->parent = (xmlNodePtr) ctxt->node;
6980 			    list->doc = ctxt->myDoc;
6981 			    if (list->next == NULL)
6982 				ent->last = list;
6983 			    list = list->next;
6984 			}
6985 			list = ent->children;
6986 #ifdef LIBXML_LEGACY_ENABLED
6987 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988 			  xmlAddEntityReference(ent, list, NULL);
6989 #endif /* LIBXML_LEGACY_ENABLED */
6990 		    }
6991 		} else {
6992 		    ent->owner = 1;
6993 		    while (list != NULL) {
6994 			list->parent = (xmlNodePtr) ent;
6995 			xmlSetTreeDoc(list, ent->doc);
6996 			if (list->next == NULL)
6997 			    ent->last = list;
6998 			list = list->next;
6999 		    }
7000 		}
7001 	    } else {
7002 		xmlFreeNodeList(list);
7003 		list = NULL;
7004 	    }
7005 	} else if ((ret != XML_ERR_OK) &&
7006 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7007 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7008 		     "Entity '%s' failed to parse\n", ent->name);
7009 	} else if (list != NULL) {
7010 	    xmlFreeNodeList(list);
7011 	    list = NULL;
7012 	}
7013 	if (ent->checked == 0)
7014 	    ent->checked = 1;
7015     } else if (ent->checked != 1) {
7016 	ctxt->nbentities += ent->checked;
7017     }
7018 
7019     /*
7020      * Now that the entity content has been gathered
7021      * provide it to the application, this can take different forms based
7022      * on the parsing modes.
7023      */
7024     if (ent->children == NULL) {
7025 	/*
7026 	 * Probably running in SAX mode and the callbacks don't
7027 	 * build the entity content. So unless we already went
7028 	 * though parsing for first checking go though the entity
7029 	 * content to generate callbacks associated to the entity
7030 	 */
7031 	if (was_checked != 0) {
7032 	    void *user_data;
7033 	    /*
7034 	     * This is a bit hackish but this seems the best
7035 	     * way to make sure both SAX and DOM entity support
7036 	     * behaves okay.
7037 	     */
7038 	    if (ctxt->userData == ctxt)
7039 		user_data = NULL;
7040 	    else
7041 		user_data = ctxt->userData;
7042 
7043 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7044 		ctxt->depth++;
7045 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7046 				   ent->content, user_data, NULL);
7047 		ctxt->depth--;
7048 	    } else if (ent->etype ==
7049 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7050 		ctxt->depth++;
7051 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7052 			   ctxt->sax, user_data, ctxt->depth,
7053 			   ent->URI, ent->ExternalID, NULL);
7054 		ctxt->depth--;
7055 	    } else {
7056 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7057 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7058 			     "invalid entity type found\n", NULL);
7059 	    }
7060 	    if (ret == XML_ERR_ENTITY_LOOP) {
7061 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7062 		return;
7063 	    }
7064 	}
7065 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7066 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7067 	    /*
7068 	     * Entity reference callback comes second, it's somewhat
7069 	     * superfluous but a compatibility to historical behaviour
7070 	     */
7071 	    ctxt->sax->reference(ctxt->userData, ent->name);
7072 	}
7073 	return;
7074     }
7075 
7076     /*
7077      * If we didn't get any children for the entity being built
7078      */
7079     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7080 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7081 	/*
7082 	 * Create a node.
7083 	 */
7084 	ctxt->sax->reference(ctxt->userData, ent->name);
7085 	return;
7086     }
7087 
7088     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7089 	/*
7090 	 * There is a problem on the handling of _private for entities
7091 	 * (bug 155816): Should we copy the content of the field from
7092 	 * the entity (possibly overwriting some value set by the user
7093 	 * when a copy is created), should we leave it alone, or should
7094 	 * we try to take care of different situations?  The problem
7095 	 * is exacerbated by the usage of this field by the xmlReader.
7096 	 * To fix this bug, we look at _private on the created node
7097 	 * and, if it's NULL, we copy in whatever was in the entity.
7098 	 * If it's not NULL we leave it alone.  This is somewhat of a
7099 	 * hack - maybe we should have further tests to determine
7100 	 * what to do.
7101 	 */
7102 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7103 	    /*
7104 	     * Seems we are generating the DOM content, do
7105 	     * a simple tree copy for all references except the first
7106 	     * In the first occurrence list contains the replacement.
7107 	     * progressive == 2 means we are operating on the Reader
7108 	     * and since nodes are discarded we must copy all the time.
7109 	     */
7110 	    if (((list == NULL) && (ent->owner == 0)) ||
7111 		(ctxt->parseMode == XML_PARSE_READER)) {
7112 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7113 
7114 		/*
7115 		 * when operating on a reader, the entities definitions
7116 		 * are always owning the entities subtree.
7117 		if (ctxt->parseMode == XML_PARSE_READER)
7118 		    ent->owner = 1;
7119 		 */
7120 
7121 		cur = ent->children;
7122 		while (cur != NULL) {
7123 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7124 		    if (nw != NULL) {
7125 			if (nw->_private == NULL)
7126 			    nw->_private = cur->_private;
7127 			if (firstChild == NULL){
7128 			    firstChild = nw;
7129 			}
7130 			nw = xmlAddChild(ctxt->node, nw);
7131 		    }
7132 		    if (cur == ent->last) {
7133 			/*
7134 			 * needed to detect some strange empty
7135 			 * node cases in the reader tests
7136 			 */
7137 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7138 			    (nw != NULL) &&
7139 			    (nw->type == XML_ELEMENT_NODE) &&
7140 			    (nw->children == NULL))
7141 			    nw->extra = 1;
7142 
7143 			break;
7144 		    }
7145 		    cur = cur->next;
7146 		}
7147 #ifdef LIBXML_LEGACY_ENABLED
7148 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7149 		  xmlAddEntityReference(ent, firstChild, nw);
7150 #endif /* LIBXML_LEGACY_ENABLED */
7151 	    } else if (list == NULL) {
7152 		xmlNodePtr nw = NULL, cur, next, last,
7153 			   firstChild = NULL;
7154 		/*
7155 		 * Copy the entity child list and make it the new
7156 		 * entity child list. The goal is to make sure any
7157 		 * ID or REF referenced will be the one from the
7158 		 * document content and not the entity copy.
7159 		 */
7160 		cur = ent->children;
7161 		ent->children = NULL;
7162 		last = ent->last;
7163 		ent->last = NULL;
7164 		while (cur != NULL) {
7165 		    next = cur->next;
7166 		    cur->next = NULL;
7167 		    cur->parent = NULL;
7168 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7169 		    if (nw != NULL) {
7170 			if (nw->_private == NULL)
7171 			    nw->_private = cur->_private;
7172 			if (firstChild == NULL){
7173 			    firstChild = cur;
7174 			}
7175 			xmlAddChild((xmlNodePtr) ent, nw);
7176 			xmlAddChild(ctxt->node, cur);
7177 		    }
7178 		    if (cur == last)
7179 			break;
7180 		    cur = next;
7181 		}
7182 		if (ent->owner == 0)
7183 		    ent->owner = 1;
7184 #ifdef LIBXML_LEGACY_ENABLED
7185 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7186 		  xmlAddEntityReference(ent, firstChild, nw);
7187 #endif /* LIBXML_LEGACY_ENABLED */
7188 	    } else {
7189 		const xmlChar *nbktext;
7190 
7191 		/*
7192 		 * the name change is to avoid coalescing of the
7193 		 * node with a possible previous text one which
7194 		 * would make ent->children a dangling pointer
7195 		 */
7196 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7197 					-1);
7198 		if (ent->children->type == XML_TEXT_NODE)
7199 		    ent->children->name = nbktext;
7200 		if ((ent->last != ent->children) &&
7201 		    (ent->last->type == XML_TEXT_NODE))
7202 		    ent->last->name = nbktext;
7203 		xmlAddChildList(ctxt->node, ent->children);
7204 	    }
7205 
7206 	    /*
7207 	     * This is to avoid a nasty side effect, see
7208 	     * characters() in SAX.c
7209 	     */
7210 	    ctxt->nodemem = 0;
7211 	    ctxt->nodelen = 0;
7212 	    return;
7213 	}
7214     }
7215 }
7216 
7217 /**
7218  * xmlParseEntityRef:
7219  * @ctxt:  an XML parser context
7220  *
7221  * parse ENTITY references declarations
7222  *
7223  * [68] EntityRef ::= '&' Name ';'
7224  *
7225  * [ WFC: Entity Declared ]
7226  * In a document without any DTD, a document with only an internal DTD
7227  * subset which contains no parameter entity references, or a document
7228  * with "standalone='yes'", the Name given in the entity reference
7229  * must match that in an entity declaration, except that well-formed
7230  * documents need not declare any of the following entities: amp, lt,
7231  * gt, apos, quot.  The declaration of a parameter entity must precede
7232  * any reference to it.  Similarly, the declaration of a general entity
7233  * must precede any reference to it which appears in a default value in an
7234  * attribute-list declaration. Note that if entities are declared in the
7235  * external subset or in external parameter entities, a non-validating
7236  * processor is not obligated to read and process their declarations;
7237  * for such documents, the rule that an entity must be declared is a
7238  * well-formedness constraint only if standalone='yes'.
7239  *
7240  * [ WFC: Parsed Entity ]
7241  * An entity reference must not contain the name of an unparsed entity
7242  *
7243  * Returns the xmlEntityPtr if found, or NULL otherwise.
7244  */
7245 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7246 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7247     const xmlChar *name;
7248     xmlEntityPtr ent = NULL;
7249 
7250     GROW;
7251 
7252     if (RAW != '&')
7253         return(NULL);
7254     NEXT;
7255     name = xmlParseName(ctxt);
7256     if (name == NULL) {
7257 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7258 		       "xmlParseEntityRef: no name\n");
7259         return(NULL);
7260     }
7261     if (RAW != ';') {
7262 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7263 	return(NULL);
7264     }
7265     NEXT;
7266 
7267     /*
7268      * Predefined entites override any extra definition
7269      */
7270     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7271         ent = xmlGetPredefinedEntity(name);
7272         if (ent != NULL)
7273             return(ent);
7274     }
7275 
7276     /*
7277      * Increate the number of entity references parsed
7278      */
7279     ctxt->nbentities++;
7280 
7281     /*
7282      * Ask first SAX for entity resolution, otherwise try the
7283      * entities which may have stored in the parser context.
7284      */
7285     if (ctxt->sax != NULL) {
7286 	if (ctxt->sax->getEntity != NULL)
7287 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7288 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7289 	    (ctxt->options & XML_PARSE_OLDSAX))
7290 	    ent = xmlGetPredefinedEntity(name);
7291 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7292 	    (ctxt->userData==ctxt)) {
7293 	    ent = xmlSAX2GetEntity(ctxt, name);
7294 	}
7295     }
7296     /*
7297      * [ WFC: Entity Declared ]
7298      * In a document without any DTD, a document with only an
7299      * internal DTD subset which contains no parameter entity
7300      * references, or a document with "standalone='yes'", the
7301      * Name given in the entity reference must match that in an
7302      * entity declaration, except that well-formed documents
7303      * need not declare any of the following entities: amp, lt,
7304      * gt, apos, quot.
7305      * The declaration of a parameter entity must precede any
7306      * reference to it.
7307      * Similarly, the declaration of a general entity must
7308      * precede any reference to it which appears in a default
7309      * value in an attribute-list declaration. Note that if
7310      * entities are declared in the external subset or in
7311      * external parameter entities, a non-validating processor
7312      * is not obligated to read and process their declarations;
7313      * for such documents, the rule that an entity must be
7314      * declared is a well-formedness constraint only if
7315      * standalone='yes'.
7316      */
7317     if (ent == NULL) {
7318 	if ((ctxt->standalone == 1) ||
7319 	    ((ctxt->hasExternalSubset == 0) &&
7320 	     (ctxt->hasPErefs == 0))) {
7321 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7322 		     "Entity '%s' not defined\n", name);
7323 	} else {
7324 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7325 		     "Entity '%s' not defined\n", name);
7326 	    if ((ctxt->inSubset == 0) &&
7327 		(ctxt->sax != NULL) &&
7328 		(ctxt->sax->reference != NULL)) {
7329 		ctxt->sax->reference(ctxt->userData, name);
7330 	    }
7331 	}
7332 	ctxt->valid = 0;
7333     }
7334 
7335     /*
7336      * [ WFC: Parsed Entity ]
7337      * An entity reference must not contain the name of an
7338      * unparsed entity
7339      */
7340     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7341 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7342 		 "Entity reference to unparsed entity %s\n", name);
7343     }
7344 
7345     /*
7346      * [ WFC: No External Entity References ]
7347      * Attribute values cannot contain direct or indirect
7348      * entity references to external entities.
7349      */
7350     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7351 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7352 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7353 	     "Attribute references external entity '%s'\n", name);
7354     }
7355     /*
7356      * [ WFC: No < in Attribute Values ]
7357      * The replacement text of any entity referred to directly or
7358      * indirectly in an attribute value (other than "&lt;") must
7359      * not contain a <.
7360      */
7361     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7362 	     (ent != NULL) && (ent->content != NULL) &&
7363 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7364 	     (xmlStrchr(ent->content, '<'))) {
7365 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7366     "'<' in entity '%s' is not allowed in attributes values\n", name);
7367     }
7368 
7369     /*
7370      * Internal check, no parameter entities here ...
7371      */
7372     else {
7373 	switch (ent->etype) {
7374 	    case XML_INTERNAL_PARAMETER_ENTITY:
7375 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7376 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7377 	     "Attempt to reference the parameter entity '%s'\n",
7378 			      name);
7379 	    break;
7380 	    default:
7381 	    break;
7382 	}
7383     }
7384 
7385     /*
7386      * [ WFC: No Recursion ]
7387      * A parsed entity must not contain a recursive reference
7388      * to itself, either directly or indirectly.
7389      * Done somewhere else
7390      */
7391     return(ent);
7392 }
7393 
7394 /**
7395  * xmlParseStringEntityRef:
7396  * @ctxt:  an XML parser context
7397  * @str:  a pointer to an index in the string
7398  *
7399  * parse ENTITY references declarations, but this version parses it from
7400  * a string value.
7401  *
7402  * [68] EntityRef ::= '&' Name ';'
7403  *
7404  * [ WFC: Entity Declared ]
7405  * In a document without any DTD, a document with only an internal DTD
7406  * subset which contains no parameter entity references, or a document
7407  * with "standalone='yes'", the Name given in the entity reference
7408  * must match that in an entity declaration, except that well-formed
7409  * documents need not declare any of the following entities: amp, lt,
7410  * gt, apos, quot.  The declaration of a parameter entity must precede
7411  * any reference to it.  Similarly, the declaration of a general entity
7412  * must precede any reference to it which appears in a default value in an
7413  * attribute-list declaration. Note that if entities are declared in the
7414  * external subset or in external parameter entities, a non-validating
7415  * processor is not obligated to read and process their declarations;
7416  * for such documents, the rule that an entity must be declared is a
7417  * well-formedness constraint only if standalone='yes'.
7418  *
7419  * [ WFC: Parsed Entity ]
7420  * An entity reference must not contain the name of an unparsed entity
7421  *
7422  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7423  * is updated to the current location in the string.
7424  */
7425 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7426 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7427     xmlChar *name;
7428     const xmlChar *ptr;
7429     xmlChar cur;
7430     xmlEntityPtr ent = NULL;
7431 
7432     if ((str == NULL) || (*str == NULL))
7433         return(NULL);
7434     ptr = *str;
7435     cur = *ptr;
7436     if (cur != '&')
7437 	return(NULL);
7438 
7439     ptr++;
7440     name = xmlParseStringName(ctxt, &ptr);
7441     if (name == NULL) {
7442 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7443 		       "xmlParseStringEntityRef: no name\n");
7444 	*str = ptr;
7445 	return(NULL);
7446     }
7447     if (*ptr != ';') {
7448 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7449         xmlFree(name);
7450 	*str = ptr;
7451 	return(NULL);
7452     }
7453     ptr++;
7454 
7455 
7456     /*
7457      * Predefined entites override any extra definition
7458      */
7459     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7460         ent = xmlGetPredefinedEntity(name);
7461         if (ent != NULL) {
7462             xmlFree(name);
7463             *str = ptr;
7464             return(ent);
7465         }
7466     }
7467 
7468     /*
7469      * Increate the number of entity references parsed
7470      */
7471     ctxt->nbentities++;
7472 
7473     /*
7474      * Ask first SAX for entity resolution, otherwise try the
7475      * entities which may have stored in the parser context.
7476      */
7477     if (ctxt->sax != NULL) {
7478 	if (ctxt->sax->getEntity != NULL)
7479 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7480 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7481 	    ent = xmlGetPredefinedEntity(name);
7482 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7483 	    ent = xmlSAX2GetEntity(ctxt, name);
7484 	}
7485     }
7486 
7487     /*
7488      * [ WFC: Entity Declared ]
7489      * In a document without any DTD, a document with only an
7490      * internal DTD subset which contains no parameter entity
7491      * references, or a document with "standalone='yes'", the
7492      * Name given in the entity reference must match that in an
7493      * entity declaration, except that well-formed documents
7494      * need not declare any of the following entities: amp, lt,
7495      * gt, apos, quot.
7496      * The declaration of a parameter entity must precede any
7497      * reference to it.
7498      * Similarly, the declaration of a general entity must
7499      * precede any reference to it which appears in a default
7500      * value in an attribute-list declaration. Note that if
7501      * entities are declared in the external subset or in
7502      * external parameter entities, a non-validating processor
7503      * is not obligated to read and process their declarations;
7504      * for such documents, the rule that an entity must be
7505      * declared is a well-formedness constraint only if
7506      * standalone='yes'.
7507      */
7508     if (ent == NULL) {
7509 	if ((ctxt->standalone == 1) ||
7510 	    ((ctxt->hasExternalSubset == 0) &&
7511 	     (ctxt->hasPErefs == 0))) {
7512 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7513 		     "Entity '%s' not defined\n", name);
7514 	} else {
7515 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7516 			  "Entity '%s' not defined\n",
7517 			  name);
7518 	}
7519 	/* TODO ? check regressions ctxt->valid = 0; */
7520     }
7521 
7522     /*
7523      * [ WFC: Parsed Entity ]
7524      * An entity reference must not contain the name of an
7525      * unparsed entity
7526      */
7527     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7528 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7529 		 "Entity reference to unparsed entity %s\n", name);
7530     }
7531 
7532     /*
7533      * [ WFC: No External Entity References ]
7534      * Attribute values cannot contain direct or indirect
7535      * entity references to external entities.
7536      */
7537     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7538 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7539 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7540 	 "Attribute references external entity '%s'\n", name);
7541     }
7542     /*
7543      * [ WFC: No < in Attribute Values ]
7544      * The replacement text of any entity referred to directly or
7545      * indirectly in an attribute value (other than "&lt;") must
7546      * not contain a <.
7547      */
7548     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7549 	     (ent != NULL) && (ent->content != NULL) &&
7550 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7551 	     (xmlStrchr(ent->content, '<'))) {
7552 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7553      "'<' in entity '%s' is not allowed in attributes values\n",
7554 			  name);
7555     }
7556 
7557     /*
7558      * Internal check, no parameter entities here ...
7559      */
7560     else {
7561 	switch (ent->etype) {
7562 	    case XML_INTERNAL_PARAMETER_ENTITY:
7563 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7564 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7565 	     "Attempt to reference the parameter entity '%s'\n",
7566 				  name);
7567 	    break;
7568 	    default:
7569 	    break;
7570 	}
7571     }
7572 
7573     /*
7574      * [ WFC: No Recursion ]
7575      * A parsed entity must not contain a recursive reference
7576      * to itself, either directly or indirectly.
7577      * Done somewhere else
7578      */
7579 
7580     xmlFree(name);
7581     *str = ptr;
7582     return(ent);
7583 }
7584 
7585 /**
7586  * xmlParsePEReference:
7587  * @ctxt:  an XML parser context
7588  *
7589  * parse PEReference declarations
7590  * The entity content is handled directly by pushing it's content as
7591  * a new input stream.
7592  *
7593  * [69] PEReference ::= '%' Name ';'
7594  *
7595  * [ WFC: No Recursion ]
7596  * A parsed entity must not contain a recursive
7597  * reference to itself, either directly or indirectly.
7598  *
7599  * [ WFC: Entity Declared ]
7600  * In a document without any DTD, a document with only an internal DTD
7601  * subset which contains no parameter entity references, or a document
7602  * with "standalone='yes'", ...  ... The declaration of a parameter
7603  * entity must precede any reference to it...
7604  *
7605  * [ VC: Entity Declared ]
7606  * In a document with an external subset or external parameter entities
7607  * with "standalone='no'", ...  ... The declaration of a parameter entity
7608  * must precede any reference to it...
7609  *
7610  * [ WFC: In DTD ]
7611  * Parameter-entity references may only appear in the DTD.
7612  * NOTE: misleading but this is handled.
7613  */
7614 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7615 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7616 {
7617     const xmlChar *name;
7618     xmlEntityPtr entity = NULL;
7619     xmlParserInputPtr input;
7620 
7621     if (RAW != '%')
7622         return;
7623     NEXT;
7624     name = xmlParseName(ctxt);
7625     if (name == NULL) {
7626 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7627 		       "xmlParsePEReference: no name\n");
7628 	return;
7629     }
7630     if (RAW != ';') {
7631 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7632         return;
7633     }
7634 
7635     NEXT;
7636 
7637     /*
7638      * Increate the number of entity references parsed
7639      */
7640     ctxt->nbentities++;
7641 
7642     /*
7643      * Request the entity from SAX
7644      */
7645     if ((ctxt->sax != NULL) &&
7646 	(ctxt->sax->getParameterEntity != NULL))
7647 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7648 					       name);
7649     if (entity == NULL) {
7650 	/*
7651 	 * [ WFC: Entity Declared ]
7652 	 * In a document without any DTD, a document with only an
7653 	 * internal DTD subset which contains no parameter entity
7654 	 * references, or a document with "standalone='yes'", ...
7655 	 * ... The declaration of a parameter entity must precede
7656 	 * any reference to it...
7657 	 */
7658 	if ((ctxt->standalone == 1) ||
7659 	    ((ctxt->hasExternalSubset == 0) &&
7660 	     (ctxt->hasPErefs == 0))) {
7661 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7662 			      "PEReference: %%%s; not found\n",
7663 			      name);
7664 	} else {
7665 	    /*
7666 	     * [ VC: Entity Declared ]
7667 	     * In a document with an external subset or external
7668 	     * parameter entities with "standalone='no'", ...
7669 	     * ... The declaration of a parameter entity must
7670 	     * precede any reference to it...
7671 	     */
7672 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7673 			  "PEReference: %%%s; not found\n",
7674 			  name, NULL);
7675 	    ctxt->valid = 0;
7676 	}
7677     } else {
7678 	/*
7679 	 * Internal checking in case the entity quest barfed
7680 	 */
7681 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7682 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7683 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 		  "Internal: %%%s; is not a parameter entity\n",
7685 			  name, NULL);
7686 	} else if (ctxt->input->free != deallocblankswrapper) {
7687 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7688 	    if (xmlPushInput(ctxt, input) < 0)
7689 		return;
7690 	} else {
7691 	    /*
7692 	     * TODO !!!
7693 	     * handle the extra spaces added before and after
7694 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
7695 	     */
7696 	    input = xmlNewEntityInputStream(ctxt, entity);
7697 	    if (xmlPushInput(ctxt, input) < 0)
7698 		return;
7699 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7700 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7701 		(IS_BLANK_CH(NXT(5)))) {
7702 		xmlParseTextDecl(ctxt);
7703 		if (ctxt->errNo ==
7704 		    XML_ERR_UNSUPPORTED_ENCODING) {
7705 		    /*
7706 		     * The XML REC instructs us to stop parsing
7707 		     * right here
7708 		     */
7709 		    ctxt->instate = XML_PARSER_EOF;
7710 		    return;
7711 		}
7712 	    }
7713 	}
7714     }
7715     ctxt->hasPErefs = 1;
7716 }
7717 
7718 /**
7719  * xmlLoadEntityContent:
7720  * @ctxt:  an XML parser context
7721  * @entity: an unloaded system entity
7722  *
7723  * Load the original content of the given system entity from the
7724  * ExternalID/SystemID given. This is to be used for Included in Literal
7725  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7726  *
7727  * Returns 0 in case of success and -1 in case of failure
7728  */
7729 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7730 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7731     xmlParserInputPtr input;
7732     xmlBufferPtr buf;
7733     int l, c;
7734     int count = 0;
7735 
7736     if ((ctxt == NULL) || (entity == NULL) ||
7737         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7738 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7739 	(entity->content != NULL)) {
7740 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7741 	            "xmlLoadEntityContent parameter error");
7742         return(-1);
7743     }
7744 
7745     if (xmlParserDebugEntities)
7746 	xmlGenericError(xmlGenericErrorContext,
7747 		"Reading %s entity content input\n", entity->name);
7748 
7749     buf = xmlBufferCreate();
7750     if (buf == NULL) {
7751 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 	            "xmlLoadEntityContent parameter error");
7753         return(-1);
7754     }
7755 
7756     input = xmlNewEntityInputStream(ctxt, entity);
7757     if (input == NULL) {
7758 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7759 	            "xmlLoadEntityContent input error");
7760 	xmlBufferFree(buf);
7761         return(-1);
7762     }
7763 
7764     /*
7765      * Push the entity as the current input, read char by char
7766      * saving to the buffer until the end of the entity or an error
7767      */
7768     if (xmlPushInput(ctxt, input) < 0) {
7769         xmlBufferFree(buf);
7770 	return(-1);
7771     }
7772 
7773     GROW;
7774     c = CUR_CHAR(l);
7775     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7776            (IS_CHAR(c))) {
7777         xmlBufferAdd(buf, ctxt->input->cur, l);
7778 	if (count++ > 100) {
7779 	    count = 0;
7780 	    GROW;
7781 	}
7782 	NEXTL(l);
7783 	c = CUR_CHAR(l);
7784     }
7785 
7786     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7787         xmlPopInput(ctxt);
7788     } else if (!IS_CHAR(c)) {
7789         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7790                           "xmlLoadEntityContent: invalid char value %d\n",
7791 	                  c);
7792 	xmlBufferFree(buf);
7793 	return(-1);
7794     }
7795     entity->content = buf->content;
7796     buf->content = NULL;
7797     xmlBufferFree(buf);
7798 
7799     return(0);
7800 }
7801 
7802 /**
7803  * xmlParseStringPEReference:
7804  * @ctxt:  an XML parser context
7805  * @str:  a pointer to an index in the string
7806  *
7807  * parse PEReference declarations
7808  *
7809  * [69] PEReference ::= '%' Name ';'
7810  *
7811  * [ WFC: No Recursion ]
7812  * A parsed entity must not contain a recursive
7813  * reference to itself, either directly or indirectly.
7814  *
7815  * [ WFC: Entity Declared ]
7816  * In a document without any DTD, a document with only an internal DTD
7817  * subset which contains no parameter entity references, or a document
7818  * with "standalone='yes'", ...  ... The declaration of a parameter
7819  * entity must precede any reference to it...
7820  *
7821  * [ VC: Entity Declared ]
7822  * In a document with an external subset or external parameter entities
7823  * with "standalone='no'", ...  ... The declaration of a parameter entity
7824  * must precede any reference to it...
7825  *
7826  * [ WFC: In DTD ]
7827  * Parameter-entity references may only appear in the DTD.
7828  * NOTE: misleading but this is handled.
7829  *
7830  * Returns the string of the entity content.
7831  *         str is updated to the current value of the index
7832  */
7833 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7834 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7835     const xmlChar *ptr;
7836     xmlChar cur;
7837     xmlChar *name;
7838     xmlEntityPtr entity = NULL;
7839 
7840     if ((str == NULL) || (*str == NULL)) return(NULL);
7841     ptr = *str;
7842     cur = *ptr;
7843     if (cur != '%')
7844         return(NULL);
7845     ptr++;
7846     name = xmlParseStringName(ctxt, &ptr);
7847     if (name == NULL) {
7848 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7849 		       "xmlParseStringPEReference: no name\n");
7850 	*str = ptr;
7851 	return(NULL);
7852     }
7853     cur = *ptr;
7854     if (cur != ';') {
7855 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7856 	xmlFree(name);
7857 	*str = ptr;
7858 	return(NULL);
7859     }
7860     ptr++;
7861 
7862     /*
7863      * Increate the number of entity references parsed
7864      */
7865     ctxt->nbentities++;
7866 
7867     /*
7868      * Request the entity from SAX
7869      */
7870     if ((ctxt->sax != NULL) &&
7871 	(ctxt->sax->getParameterEntity != NULL))
7872 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7873 					       name);
7874     if (entity == NULL) {
7875 	/*
7876 	 * [ WFC: Entity Declared ]
7877 	 * In a document without any DTD, a document with only an
7878 	 * internal DTD subset which contains no parameter entity
7879 	 * references, or a document with "standalone='yes'", ...
7880 	 * ... The declaration of a parameter entity must precede
7881 	 * any reference to it...
7882 	 */
7883 	if ((ctxt->standalone == 1) ||
7884 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7885 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7886 		 "PEReference: %%%s; not found\n", name);
7887 	} else {
7888 	    /*
7889 	     * [ VC: Entity Declared ]
7890 	     * In a document with an external subset or external
7891 	     * parameter entities with "standalone='no'", ...
7892 	     * ... The declaration of a parameter entity must
7893 	     * precede any reference to it...
7894 	     */
7895 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7896 			  "PEReference: %%%s; not found\n",
7897 			  name, NULL);
7898 	    ctxt->valid = 0;
7899 	}
7900     } else {
7901 	/*
7902 	 * Internal checking in case the entity quest barfed
7903 	 */
7904 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 			  "%%%s; is not a parameter entity\n",
7908 			  name, NULL);
7909 	}
7910     }
7911     ctxt->hasPErefs = 1;
7912     xmlFree(name);
7913     *str = ptr;
7914     return(entity);
7915 }
7916 
7917 /**
7918  * xmlParseDocTypeDecl:
7919  * @ctxt:  an XML parser context
7920  *
7921  * parse a DOCTYPE declaration
7922  *
7923  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7924  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7925  *
7926  * [ VC: Root Element Type ]
7927  * The Name in the document type declaration must match the element
7928  * type of the root element.
7929  */
7930 
7931 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7932 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7933     const xmlChar *name = NULL;
7934     xmlChar *ExternalID = NULL;
7935     xmlChar *URI = NULL;
7936 
7937     /*
7938      * We know that '<!DOCTYPE' has been detected.
7939      */
7940     SKIP(9);
7941 
7942     SKIP_BLANKS;
7943 
7944     /*
7945      * Parse the DOCTYPE name.
7946      */
7947     name = xmlParseName(ctxt);
7948     if (name == NULL) {
7949 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7950 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7951     }
7952     ctxt->intSubName = name;
7953 
7954     SKIP_BLANKS;
7955 
7956     /*
7957      * Check for SystemID and ExternalID
7958      */
7959     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7960 
7961     if ((URI != NULL) || (ExternalID != NULL)) {
7962         ctxt->hasExternalSubset = 1;
7963     }
7964     ctxt->extSubURI = URI;
7965     ctxt->extSubSystem = ExternalID;
7966 
7967     SKIP_BLANKS;
7968 
7969     /*
7970      * Create and update the internal subset.
7971      */
7972     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7973 	(!ctxt->disableSAX))
7974 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7975 
7976     /*
7977      * Is there any internal subset declarations ?
7978      * they are handled separately in xmlParseInternalSubset()
7979      */
7980     if (RAW == '[')
7981 	return;
7982 
7983     /*
7984      * We should be at the end of the DOCTYPE declaration.
7985      */
7986     if (RAW != '>') {
7987 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7988     }
7989     NEXT;
7990 }
7991 
7992 /**
7993  * xmlParseInternalSubset:
7994  * @ctxt:  an XML parser context
7995  *
7996  * parse the internal subset declaration
7997  *
7998  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7999  */
8000 
8001 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8002 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8003     /*
8004      * Is there any DTD definition ?
8005      */
8006     if (RAW == '[') {
8007         ctxt->instate = XML_PARSER_DTD;
8008         NEXT;
8009 	/*
8010 	 * Parse the succession of Markup declarations and
8011 	 * PEReferences.
8012 	 * Subsequence (markupdecl | PEReference | S)*
8013 	 */
8014 	while (RAW != ']') {
8015 	    const xmlChar *check = CUR_PTR;
8016 	    unsigned int cons = ctxt->input->consumed;
8017 
8018 	    SKIP_BLANKS;
8019 	    xmlParseMarkupDecl(ctxt);
8020 	    xmlParsePEReference(ctxt);
8021 
8022 	    /*
8023 	     * Pop-up of finished entities.
8024 	     */
8025 	    while ((RAW == 0) && (ctxt->inputNr > 1))
8026 		xmlPopInput(ctxt);
8027 
8028 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8029 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8030 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8031 		break;
8032 	    }
8033 	}
8034 	if (RAW == ']') {
8035 	    NEXT;
8036 	    SKIP_BLANKS;
8037 	}
8038     }
8039 
8040     /*
8041      * We should be at the end of the DOCTYPE declaration.
8042      */
8043     if (RAW != '>') {
8044 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8045     }
8046     NEXT;
8047 }
8048 
8049 #ifdef LIBXML_SAX1_ENABLED
8050 /**
8051  * xmlParseAttribute:
8052  * @ctxt:  an XML parser context
8053  * @value:  a xmlChar ** used to store the value of the attribute
8054  *
8055  * parse an attribute
8056  *
8057  * [41] Attribute ::= Name Eq AttValue
8058  *
8059  * [ WFC: No External Entity References ]
8060  * Attribute values cannot contain direct or indirect entity references
8061  * to external entities.
8062  *
8063  * [ WFC: No < in Attribute Values ]
8064  * The replacement text of any entity referred to directly or indirectly in
8065  * an attribute value (other than "&lt;") must not contain a <.
8066  *
8067  * [ VC: Attribute Value Type ]
8068  * The attribute must have been declared; the value must be of the type
8069  * declared for it.
8070  *
8071  * [25] Eq ::= S? '=' S?
8072  *
8073  * With namespace:
8074  *
8075  * [NS 11] Attribute ::= QName Eq AttValue
8076  *
8077  * Also the case QName == xmlns:??? is handled independently as a namespace
8078  * definition.
8079  *
8080  * Returns the attribute name, and the value in *value.
8081  */
8082 
8083 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8084 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8085     const xmlChar *name;
8086     xmlChar *val;
8087 
8088     *value = NULL;
8089     GROW;
8090     name = xmlParseName(ctxt);
8091     if (name == NULL) {
8092 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8093 	               "error parsing attribute name\n");
8094         return(NULL);
8095     }
8096 
8097     /*
8098      * read the value
8099      */
8100     SKIP_BLANKS;
8101     if (RAW == '=') {
8102         NEXT;
8103 	SKIP_BLANKS;
8104 	val = xmlParseAttValue(ctxt);
8105 	ctxt->instate = XML_PARSER_CONTENT;
8106     } else {
8107 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8108 	       "Specification mandate value for attribute %s\n", name);
8109 	return(NULL);
8110     }
8111 
8112     /*
8113      * Check that xml:lang conforms to the specification
8114      * No more registered as an error, just generate a warning now
8115      * since this was deprecated in XML second edition
8116      */
8117     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8118 	if (!xmlCheckLanguageID(val)) {
8119 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8120 		          "Malformed value for xml:lang : %s\n",
8121 			  val, NULL);
8122 	}
8123     }
8124 
8125     /*
8126      * Check that xml:space conforms to the specification
8127      */
8128     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8129 	if (xmlStrEqual(val, BAD_CAST "default"))
8130 	    *(ctxt->space) = 0;
8131 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8132 	    *(ctxt->space) = 1;
8133 	else {
8134 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8136                                  val, NULL);
8137 	}
8138     }
8139 
8140     *value = val;
8141     return(name);
8142 }
8143 
8144 /**
8145  * xmlParseStartTag:
8146  * @ctxt:  an XML parser context
8147  *
8148  * parse a start of tag either for rule element or
8149  * EmptyElement. In both case we don't parse the tag closing chars.
8150  *
8151  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8152  *
8153  * [ WFC: Unique Att Spec ]
8154  * No attribute name may appear more than once in the same start-tag or
8155  * empty-element tag.
8156  *
8157  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8158  *
8159  * [ WFC: Unique Att Spec ]
8160  * No attribute name may appear more than once in the same start-tag or
8161  * empty-element tag.
8162  *
8163  * With namespace:
8164  *
8165  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8166  *
8167  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8168  *
8169  * Returns the element name parsed
8170  */
8171 
8172 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8173 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8174     const xmlChar *name;
8175     const xmlChar *attname;
8176     xmlChar *attvalue;
8177     const xmlChar **atts = ctxt->atts;
8178     int nbatts = 0;
8179     int maxatts = ctxt->maxatts;
8180     int i;
8181 
8182     if (RAW != '<') return(NULL);
8183     NEXT1;
8184 
8185     name = xmlParseName(ctxt);
8186     if (name == NULL) {
8187 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188 	     "xmlParseStartTag: invalid element name\n");
8189         return(NULL);
8190     }
8191 
8192     /*
8193      * Now parse the attributes, it ends up with the ending
8194      *
8195      * (S Attribute)* S?
8196      */
8197     SKIP_BLANKS;
8198     GROW;
8199 
8200     while ((RAW != '>') &&
8201 	   ((RAW != '/') || (NXT(1) != '>')) &&
8202 	   (IS_BYTE_CHAR(RAW))) {
8203 	const xmlChar *q = CUR_PTR;
8204 	unsigned int cons = ctxt->input->consumed;
8205 
8206 	attname = xmlParseAttribute(ctxt, &attvalue);
8207         if ((attname != NULL) && (attvalue != NULL)) {
8208 	    /*
8209 	     * [ WFC: Unique Att Spec ]
8210 	     * No attribute name may appear more than once in the same
8211 	     * start-tag or empty-element tag.
8212 	     */
8213 	    for (i = 0; i < nbatts;i += 2) {
8214 	        if (xmlStrEqual(atts[i], attname)) {
8215 		    xmlErrAttributeDup(ctxt, NULL, attname);
8216 		    xmlFree(attvalue);
8217 		    goto failed;
8218 		}
8219 	    }
8220 	    /*
8221 	     * Add the pair to atts
8222 	     */
8223 	    if (atts == NULL) {
8224 	        maxatts = 22; /* allow for 10 attrs by default */
8225 	        atts = (const xmlChar **)
8226 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8227 		if (atts == NULL) {
8228 		    xmlErrMemory(ctxt, NULL);
8229 		    if (attvalue != NULL)
8230 			xmlFree(attvalue);
8231 		    goto failed;
8232 		}
8233 		ctxt->atts = atts;
8234 		ctxt->maxatts = maxatts;
8235 	    } else if (nbatts + 4 > maxatts) {
8236 	        const xmlChar **n;
8237 
8238 	        maxatts *= 2;
8239 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8240 					     maxatts * sizeof(const xmlChar *));
8241 		if (n == NULL) {
8242 		    xmlErrMemory(ctxt, NULL);
8243 		    if (attvalue != NULL)
8244 			xmlFree(attvalue);
8245 		    goto failed;
8246 		}
8247 		atts = n;
8248 		ctxt->atts = atts;
8249 		ctxt->maxatts = maxatts;
8250 	    }
8251 	    atts[nbatts++] = attname;
8252 	    atts[nbatts++] = attvalue;
8253 	    atts[nbatts] = NULL;
8254 	    atts[nbatts + 1] = NULL;
8255 	} else {
8256 	    if (attvalue != NULL)
8257 		xmlFree(attvalue);
8258 	}
8259 
8260 failed:
8261 
8262 	GROW
8263 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8264 	    break;
8265 	if (!IS_BLANK_CH(RAW)) {
8266 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8267 			   "attributes construct error\n");
8268 	}
8269 	SKIP_BLANKS;
8270         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8271             (attname == NULL) && (attvalue == NULL)) {
8272 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8273 			   "xmlParseStartTag: problem parsing attributes\n");
8274 	    break;
8275 	}
8276 	SHRINK;
8277         GROW;
8278     }
8279 
8280     /*
8281      * SAX: Start of Element !
8282      */
8283     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8284 	(!ctxt->disableSAX)) {
8285 	if (nbatts > 0)
8286 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8287 	else
8288 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8289     }
8290 
8291     if (atts != NULL) {
8292         /* Free only the content strings */
8293         for (i = 1;i < nbatts;i+=2)
8294 	    if (atts[i] != NULL)
8295 	       xmlFree((xmlChar *) atts[i]);
8296     }
8297     return(name);
8298 }
8299 
8300 /**
8301  * xmlParseEndTag1:
8302  * @ctxt:  an XML parser context
8303  * @line:  line of the start tag
8304  * @nsNr:  number of namespaces on the start tag
8305  *
8306  * parse an end of tag
8307  *
8308  * [42] ETag ::= '</' Name S? '>'
8309  *
8310  * With namespace
8311  *
8312  * [NS 9] ETag ::= '</' QName S? '>'
8313  */
8314 
8315 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8316 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8317     const xmlChar *name;
8318 
8319     GROW;
8320     if ((RAW != '<') || (NXT(1) != '/')) {
8321 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8322 		       "xmlParseEndTag: '</' not found\n");
8323 	return;
8324     }
8325     SKIP(2);
8326 
8327     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8328 
8329     /*
8330      * We should definitely be at the ending "S? '>'" part
8331      */
8332     GROW;
8333     SKIP_BLANKS;
8334     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8335 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8336     } else
8337 	NEXT1;
8338 
8339     /*
8340      * [ WFC: Element Type Match ]
8341      * The Name in an element's end-tag must match the element type in the
8342      * start-tag.
8343      *
8344      */
8345     if (name != (xmlChar*)1) {
8346         if (name == NULL) name = BAD_CAST "unparseable";
8347         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8348 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8349 		                ctxt->name, line, name);
8350     }
8351 
8352     /*
8353      * SAX: End of Tag
8354      */
8355     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8356 	(!ctxt->disableSAX))
8357         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8358 
8359     namePop(ctxt);
8360     spacePop(ctxt);
8361     return;
8362 }
8363 
8364 /**
8365  * xmlParseEndTag:
8366  * @ctxt:  an XML parser context
8367  *
8368  * parse an end of tag
8369  *
8370  * [42] ETag ::= '</' Name S? '>'
8371  *
8372  * With namespace
8373  *
8374  * [NS 9] ETag ::= '</' QName S? '>'
8375  */
8376 
8377 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8378 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8379     xmlParseEndTag1(ctxt, 0);
8380 }
8381 #endif /* LIBXML_SAX1_ENABLED */
8382 
8383 /************************************************************************
8384  *									*
8385  *		      SAX 2 specific operations				*
8386  *									*
8387  ************************************************************************/
8388 
8389 /*
8390  * xmlGetNamespace:
8391  * @ctxt:  an XML parser context
8392  * @prefix:  the prefix to lookup
8393  *
8394  * Lookup the namespace name for the @prefix (which ca be NULL)
8395  * The prefix must come from the @ctxt->dict dictionnary
8396  *
8397  * Returns the namespace name or NULL if not bound
8398  */
8399 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8400 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8401     int i;
8402 
8403     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8404     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8405         if (ctxt->nsTab[i] == prefix) {
8406 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8407 	        return(NULL);
8408 	    return(ctxt->nsTab[i + 1]);
8409 	}
8410     return(NULL);
8411 }
8412 
8413 /**
8414  * xmlParseQName:
8415  * @ctxt:  an XML parser context
8416  * @prefix:  pointer to store the prefix part
8417  *
8418  * parse an XML Namespace QName
8419  *
8420  * [6]  QName  ::= (Prefix ':')? LocalPart
8421  * [7]  Prefix  ::= NCName
8422  * [8]  LocalPart  ::= NCName
8423  *
8424  * Returns the Name parsed or NULL
8425  */
8426 
8427 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8428 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8429     const xmlChar *l, *p;
8430 
8431     GROW;
8432 
8433     l = xmlParseNCName(ctxt);
8434     if (l == NULL) {
8435         if (CUR == ':') {
8436 	    l = xmlParseName(ctxt);
8437 	    if (l != NULL) {
8438 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8439 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8440 		*prefix = NULL;
8441 		return(l);
8442 	    }
8443 	}
8444         return(NULL);
8445     }
8446     if (CUR == ':') {
8447         NEXT;
8448 	p = l;
8449 	l = xmlParseNCName(ctxt);
8450 	if (l == NULL) {
8451 	    xmlChar *tmp;
8452 
8453             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8454 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8455 	    l = xmlParseNmtoken(ctxt);
8456 	    if (l == NULL)
8457 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8458 	    else {
8459 		tmp = xmlBuildQName(l, p, NULL, 0);
8460 		xmlFree((char *)l);
8461 	    }
8462 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8463 	    if (tmp != NULL) xmlFree(tmp);
8464 	    *prefix = NULL;
8465 	    return(p);
8466 	}
8467 	if (CUR == ':') {
8468 	    xmlChar *tmp;
8469 
8470             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8471 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8472 	    NEXT;
8473 	    tmp = (xmlChar *) xmlParseName(ctxt);
8474 	    if (tmp != NULL) {
8475 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8476 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8477 		if (tmp != NULL) xmlFree(tmp);
8478 		*prefix = p;
8479 		return(l);
8480 	    }
8481 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8482 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8483 	    if (tmp != NULL) xmlFree(tmp);
8484 	    *prefix = p;
8485 	    return(l);
8486 	}
8487 	*prefix = p;
8488     } else
8489         *prefix = NULL;
8490     return(l);
8491 }
8492 
8493 /**
8494  * xmlParseQNameAndCompare:
8495  * @ctxt:  an XML parser context
8496  * @name:  the localname
8497  * @prefix:  the prefix, if any.
8498  *
8499  * parse an XML name and compares for match
8500  * (specialized for endtag parsing)
8501  *
8502  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8503  * and the name for mismatch
8504  */
8505 
8506 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8507 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8508                         xmlChar const *prefix) {
8509     const xmlChar *cmp;
8510     const xmlChar *in;
8511     const xmlChar *ret;
8512     const xmlChar *prefix2;
8513 
8514     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8515 
8516     GROW;
8517     in = ctxt->input->cur;
8518 
8519     cmp = prefix;
8520     while (*in != 0 && *in == *cmp) {
8521     	++in;
8522 	++cmp;
8523     }
8524     if ((*cmp == 0) && (*in == ':')) {
8525         in++;
8526 	cmp = name;
8527 	while (*in != 0 && *in == *cmp) {
8528 	    ++in;
8529 	    ++cmp;
8530 	}
8531 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8532 	    /* success */
8533 	    ctxt->input->cur = in;
8534 	    return((const xmlChar*) 1);
8535 	}
8536     }
8537     /*
8538      * all strings coms from the dictionary, equality can be done directly
8539      */
8540     ret = xmlParseQName (ctxt, &prefix2);
8541     if ((ret == name) && (prefix == prefix2))
8542 	return((const xmlChar*) 1);
8543     return ret;
8544 }
8545 
8546 /**
8547  * xmlParseAttValueInternal:
8548  * @ctxt:  an XML parser context
8549  * @len:  attribute len result
8550  * @alloc:  whether the attribute was reallocated as a new string
8551  * @normalize:  if 1 then further non-CDATA normalization must be done
8552  *
8553  * parse a value for an attribute.
8554  * NOTE: if no normalization is needed, the routine will return pointers
8555  *       directly from the data buffer.
8556  *
8557  * 3.3.3 Attribute-Value Normalization:
8558  * Before the value of an attribute is passed to the application or
8559  * checked for validity, the XML processor must normalize it as follows:
8560  * - a character reference is processed by appending the referenced
8561  *   character to the attribute value
8562  * - an entity reference is processed by recursively processing the
8563  *   replacement text of the entity
8564  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8565  *   appending #x20 to the normalized value, except that only a single
8566  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8567  *   parsed entity or the literal entity value of an internal parsed entity
8568  * - other characters are processed by appending them to the normalized value
8569  * If the declared value is not CDATA, then the XML processor must further
8570  * process the normalized attribute value by discarding any leading and
8571  * trailing space (#x20) characters, and by replacing sequences of space
8572  * (#x20) characters by a single space (#x20) character.
8573  * All attributes for which no declaration has been read should be treated
8574  * by a non-validating parser as if declared CDATA.
8575  *
8576  * Returns the AttValue parsed or NULL. The value has to be freed by the
8577  *     caller if it was copied, this can be detected by val[*len] == 0.
8578  */
8579 
8580 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8581 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8582                          int normalize)
8583 {
8584     xmlChar limit = 0;
8585     const xmlChar *in = NULL, *start, *end, *last;
8586     xmlChar *ret = NULL;
8587 
8588     GROW;
8589     in = (xmlChar *) CUR_PTR;
8590     if (*in != '"' && *in != '\'') {
8591         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8592         return (NULL);
8593     }
8594     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8595 
8596     /*
8597      * try to handle in this routine the most common case where no
8598      * allocation of a new string is required and where content is
8599      * pure ASCII.
8600      */
8601     limit = *in++;
8602     end = ctxt->input->end;
8603     start = in;
8604     if (in >= end) {
8605         const xmlChar *oldbase = ctxt->input->base;
8606 	GROW;
8607 	if (oldbase != ctxt->input->base) {
8608 	    long delta = ctxt->input->base - oldbase;
8609 	    start = start + delta;
8610 	    in = in + delta;
8611 	}
8612 	end = ctxt->input->end;
8613     }
8614     if (normalize) {
8615         /*
8616 	 * Skip any leading spaces
8617 	 */
8618 	while ((in < end) && (*in != limit) &&
8619 	       ((*in == 0x20) || (*in == 0x9) ||
8620 	        (*in == 0xA) || (*in == 0xD))) {
8621 	    in++;
8622 	    start = in;
8623 	    if (in >= end) {
8624 		const xmlChar *oldbase = ctxt->input->base;
8625 		GROW;
8626 		if (oldbase != ctxt->input->base) {
8627 		    long delta = ctxt->input->base - oldbase;
8628 		    start = start + delta;
8629 		    in = in + delta;
8630 		}
8631 		end = ctxt->input->end;
8632 	    }
8633 	}
8634 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8635 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8636 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8637 	    if (in >= end) {
8638 		const xmlChar *oldbase = ctxt->input->base;
8639 		GROW;
8640 		if (oldbase != ctxt->input->base) {
8641 		    long delta = ctxt->input->base - oldbase;
8642 		    start = start + delta;
8643 		    in = in + delta;
8644 		}
8645 		end = ctxt->input->end;
8646 	    }
8647 	}
8648 	last = in;
8649 	/*
8650 	 * skip the trailing blanks
8651 	 */
8652 	while ((last[-1] == 0x20) && (last > start)) last--;
8653 	while ((in < end) && (*in != limit) &&
8654 	       ((*in == 0x20) || (*in == 0x9) ||
8655 	        (*in == 0xA) || (*in == 0xD))) {
8656 	    in++;
8657 	    if (in >= end) {
8658 		const xmlChar *oldbase = ctxt->input->base;
8659 		GROW;
8660 		if (oldbase != ctxt->input->base) {
8661 		    long delta = ctxt->input->base - oldbase;
8662 		    start = start + delta;
8663 		    in = in + delta;
8664 		    last = last + delta;
8665 		}
8666 		end = ctxt->input->end;
8667 	    }
8668 	}
8669 	if (*in != limit) goto need_complex;
8670     } else {
8671 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8672 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8673 	    in++;
8674 	    if (in >= end) {
8675 		const xmlChar *oldbase = ctxt->input->base;
8676 		GROW;
8677 		if (oldbase != ctxt->input->base) {
8678 		    long delta = ctxt->input->base - oldbase;
8679 		    start = start + delta;
8680 		    in = in + delta;
8681 		}
8682 		end = ctxt->input->end;
8683 	    }
8684 	}
8685 	last = in;
8686 	if (*in != limit) goto need_complex;
8687     }
8688     in++;
8689     if (len != NULL) {
8690         *len = last - start;
8691         ret = (xmlChar *) start;
8692     } else {
8693         if (alloc) *alloc = 1;
8694         ret = xmlStrndup(start, last - start);
8695     }
8696     CUR_PTR = in;
8697     if (alloc) *alloc = 0;
8698     return ret;
8699 need_complex:
8700     if (alloc) *alloc = 1;
8701     return xmlParseAttValueComplex(ctxt, len, normalize);
8702 }
8703 
8704 /**
8705  * xmlParseAttribute2:
8706  * @ctxt:  an XML parser context
8707  * @pref:  the element prefix
8708  * @elem:  the element name
8709  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8710  * @value:  a xmlChar ** used to store the value of the attribute
8711  * @len:  an int * to save the length of the attribute
8712  * @alloc:  an int * to indicate if the attribute was allocated
8713  *
8714  * parse an attribute in the new SAX2 framework.
8715  *
8716  * Returns the attribute name, and the value in *value, .
8717  */
8718 
8719 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8720 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8721                    const xmlChar * pref, const xmlChar * elem,
8722                    const xmlChar ** prefix, xmlChar ** value,
8723                    int *len, int *alloc)
8724 {
8725     const xmlChar *name;
8726     xmlChar *val, *internal_val = NULL;
8727     int normalize = 0;
8728 
8729     *value = NULL;
8730     GROW;
8731     name = xmlParseQName(ctxt, prefix);
8732     if (name == NULL) {
8733         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8734                        "error parsing attribute name\n");
8735         return (NULL);
8736     }
8737 
8738     /*
8739      * get the type if needed
8740      */
8741     if (ctxt->attsSpecial != NULL) {
8742         int type;
8743 
8744         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8745                                             pref, elem, *prefix, name);
8746         if (type != 0)
8747             normalize = 1;
8748     }
8749 
8750     /*
8751      * read the value
8752      */
8753     SKIP_BLANKS;
8754     if (RAW == '=') {
8755         NEXT;
8756         SKIP_BLANKS;
8757         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8758 	if (normalize) {
8759 	    /*
8760 	     * Sometimes a second normalisation pass for spaces is needed
8761 	     * but that only happens if charrefs or entities refernces
8762 	     * have been used in the attribute value, i.e. the attribute
8763 	     * value have been extracted in an allocated string already.
8764 	     */
8765 	    if (*alloc) {
8766 	        const xmlChar *val2;
8767 
8768 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8769 		if ((val2 != NULL) && (val2 != val)) {
8770 		    xmlFree(val);
8771 		    val = (xmlChar *) val2;
8772 		}
8773 	    }
8774 	}
8775         ctxt->instate = XML_PARSER_CONTENT;
8776     } else {
8777         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8778                           "Specification mandate value for attribute %s\n",
8779                           name);
8780         return (NULL);
8781     }
8782 
8783     if (*prefix == ctxt->str_xml) {
8784         /*
8785          * Check that xml:lang conforms to the specification
8786          * No more registered as an error, just generate a warning now
8787          * since this was deprecated in XML second edition
8788          */
8789         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8790             internal_val = xmlStrndup(val, *len);
8791             if (!xmlCheckLanguageID(internal_val)) {
8792                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8793                               "Malformed value for xml:lang : %s\n",
8794                               internal_val, NULL);
8795             }
8796         }
8797 
8798         /*
8799          * Check that xml:space conforms to the specification
8800          */
8801         if (xmlStrEqual(name, BAD_CAST "space")) {
8802             internal_val = xmlStrndup(val, *len);
8803             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8804                 *(ctxt->space) = 0;
8805             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8806                 *(ctxt->space) = 1;
8807             else {
8808                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8809                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8810                               internal_val, NULL);
8811             }
8812         }
8813         if (internal_val) {
8814             xmlFree(internal_val);
8815         }
8816     }
8817 
8818     *value = val;
8819     return (name);
8820 }
8821 /**
8822  * xmlParseStartTag2:
8823  * @ctxt:  an XML parser context
8824  *
8825  * parse a start of tag either for rule element or
8826  * EmptyElement. In both case we don't parse the tag closing chars.
8827  * This routine is called when running SAX2 parsing
8828  *
8829  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8830  *
8831  * [ WFC: Unique Att Spec ]
8832  * No attribute name may appear more than once in the same start-tag or
8833  * empty-element tag.
8834  *
8835  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8836  *
8837  * [ WFC: Unique Att Spec ]
8838  * No attribute name may appear more than once in the same start-tag or
8839  * empty-element tag.
8840  *
8841  * With namespace:
8842  *
8843  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8844  *
8845  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8846  *
8847  * Returns the element name parsed
8848  */
8849 
8850 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8851 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8852                   const xmlChar **URI, int *tlen) {
8853     const xmlChar *localname;
8854     const xmlChar *prefix;
8855     const xmlChar *attname;
8856     const xmlChar *aprefix;
8857     const xmlChar *nsname;
8858     xmlChar *attvalue;
8859     const xmlChar **atts = ctxt->atts;
8860     int maxatts = ctxt->maxatts;
8861     int nratts, nbatts, nbdef;
8862     int i, j, nbNs, attval, oldline, oldcol;
8863     const xmlChar *base;
8864     unsigned long cur;
8865     int nsNr = ctxt->nsNr;
8866 
8867     if (RAW != '<') return(NULL);
8868     NEXT1;
8869 
8870     /*
8871      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8872      *       point since the attribute values may be stored as pointers to
8873      *       the buffer and calling SHRINK would destroy them !
8874      *       The Shrinking is only possible once the full set of attribute
8875      *       callbacks have been done.
8876      */
8877 reparse:
8878     SHRINK;
8879     base = ctxt->input->base;
8880     cur = ctxt->input->cur - ctxt->input->base;
8881     oldline = ctxt->input->line;
8882     oldcol = ctxt->input->col;
8883     nbatts = 0;
8884     nratts = 0;
8885     nbdef = 0;
8886     nbNs = 0;
8887     attval = 0;
8888     /* Forget any namespaces added during an earlier parse of this element. */
8889     ctxt->nsNr = nsNr;
8890 
8891     localname = xmlParseQName(ctxt, &prefix);
8892     if (localname == NULL) {
8893 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8894 		       "StartTag: invalid element name\n");
8895         return(NULL);
8896     }
8897     *tlen = ctxt->input->cur - ctxt->input->base - cur;
8898 
8899     /*
8900      * Now parse the attributes, it ends up with the ending
8901      *
8902      * (S Attribute)* S?
8903      */
8904     SKIP_BLANKS;
8905     GROW;
8906     if (ctxt->input->base != base) goto base_changed;
8907 
8908     while ((RAW != '>') &&
8909 	   ((RAW != '/') || (NXT(1) != '>')) &&
8910 	   (IS_BYTE_CHAR(RAW))) {
8911 	const xmlChar *q = CUR_PTR;
8912 	unsigned int cons = ctxt->input->consumed;
8913 	int len = -1, alloc = 0;
8914 
8915 	attname = xmlParseAttribute2(ctxt, prefix, localname,
8916 	                             &aprefix, &attvalue, &len, &alloc);
8917 	if (ctxt->input->base != base) {
8918 	    if ((attvalue != NULL) && (alloc != 0))
8919 	        xmlFree(attvalue);
8920 	    attvalue = NULL;
8921 	    goto base_changed;
8922 	}
8923         if ((attname != NULL) && (attvalue != NULL)) {
8924 	    if (len < 0) len = xmlStrlen(attvalue);
8925             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8926 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8927 		xmlURIPtr uri;
8928 
8929                 if (*URL != 0) {
8930 		    uri = xmlParseURI((const char *) URL);
8931 		    if (uri == NULL) {
8932 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8933 			         "xmlns: '%s' is not a valid URI\n",
8934 					   URL, NULL, NULL);
8935 		    } else {
8936 			if (uri->scheme == NULL) {
8937 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8938 				      "xmlns: URI %s is not absolute\n",
8939 				      URL, NULL, NULL);
8940 			}
8941 			xmlFreeURI(uri);
8942 		    }
8943 		    if (URL == ctxt->str_xml_ns) {
8944 			if (attname != ctxt->str_xml) {
8945 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8946 			 "xml namespace URI cannot be the default namespace\n",
8947 				     NULL, NULL, NULL);
8948 			}
8949 			goto skip_default_ns;
8950 		    }
8951 		    if ((len == 29) &&
8952 			(xmlStrEqual(URL,
8953 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8954 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8955 			     "reuse of the xmlns namespace name is forbidden\n",
8956 				 NULL, NULL, NULL);
8957 			goto skip_default_ns;
8958 		    }
8959 		}
8960 		/*
8961 		 * check that it's not a defined namespace
8962 		 */
8963 		for (j = 1;j <= nbNs;j++)
8964 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8965 			break;
8966 		if (j <= nbNs)
8967 		    xmlErrAttributeDup(ctxt, NULL, attname);
8968 		else
8969 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8970 skip_default_ns:
8971 		if (alloc != 0) xmlFree(attvalue);
8972 		SKIP_BLANKS;
8973 		continue;
8974 	    }
8975             if (aprefix == ctxt->str_xmlns) {
8976 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8977 		xmlURIPtr uri;
8978 
8979                 if (attname == ctxt->str_xml) {
8980 		    if (URL != ctxt->str_xml_ns) {
8981 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8982 			         "xml namespace prefix mapped to wrong URI\n",
8983 			         NULL, NULL, NULL);
8984 		    }
8985 		    /*
8986 		     * Do not keep a namespace definition node
8987 		     */
8988 		    goto skip_ns;
8989 		}
8990                 if (URL == ctxt->str_xml_ns) {
8991 		    if (attname != ctxt->str_xml) {
8992 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 			         "xml namespace URI mapped to wrong prefix\n",
8994 			         NULL, NULL, NULL);
8995 		    }
8996 		    goto skip_ns;
8997 		}
8998                 if (attname == ctxt->str_xmlns) {
8999 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9000 			     "redefinition of the xmlns prefix is forbidden\n",
9001 			     NULL, NULL, NULL);
9002 		    goto skip_ns;
9003 		}
9004 		if ((len == 29) &&
9005 		    (xmlStrEqual(URL,
9006 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9007 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9008 			     "reuse of the xmlns namespace name is forbidden\n",
9009 			     NULL, NULL, NULL);
9010 		    goto skip_ns;
9011 		}
9012 		if ((URL == NULL) || (URL[0] == 0)) {
9013 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9014 		             "xmlns:%s: Empty XML namespace is not allowed\n",
9015 			          attname, NULL, NULL);
9016 		    goto skip_ns;
9017 		} else {
9018 		    uri = xmlParseURI((const char *) URL);
9019 		    if (uri == NULL) {
9020 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9021 			     "xmlns:%s: '%s' is not a valid URI\n",
9022 					   attname, URL, NULL);
9023 		    } else {
9024 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9025 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9026 				      "xmlns:%s: URI %s is not absolute\n",
9027 				      attname, URL, NULL);
9028 			}
9029 			xmlFreeURI(uri);
9030 		    }
9031 		}
9032 
9033 		/*
9034 		 * check that it's not a defined namespace
9035 		 */
9036 		for (j = 1;j <= nbNs;j++)
9037 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9038 			break;
9039 		if (j <= nbNs)
9040 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9041 		else
9042 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9043 skip_ns:
9044 		if (alloc != 0) xmlFree(attvalue);
9045 		SKIP_BLANKS;
9046 		if (ctxt->input->base != base) goto base_changed;
9047 		continue;
9048 	    }
9049 
9050 	    /*
9051 	     * Add the pair to atts
9052 	     */
9053 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9054 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9055 		    if (attvalue[len] == 0)
9056 			xmlFree(attvalue);
9057 		    goto failed;
9058 		}
9059 	        maxatts = ctxt->maxatts;
9060 		atts = ctxt->atts;
9061 	    }
9062 	    ctxt->attallocs[nratts++] = alloc;
9063 	    atts[nbatts++] = attname;
9064 	    atts[nbatts++] = aprefix;
9065 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9066 	    atts[nbatts++] = attvalue;
9067 	    attvalue += len;
9068 	    atts[nbatts++] = attvalue;
9069 	    /*
9070 	     * tag if some deallocation is needed
9071 	     */
9072 	    if (alloc != 0) attval = 1;
9073 	} else {
9074 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9075 		xmlFree(attvalue);
9076 	}
9077 
9078 failed:
9079 
9080 	GROW
9081 	if (ctxt->input->base != base) goto base_changed;
9082 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9083 	    break;
9084 	if (!IS_BLANK_CH(RAW)) {
9085 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9086 			   "attributes construct error\n");
9087 	    break;
9088 	}
9089 	SKIP_BLANKS;
9090         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9091             (attname == NULL) && (attvalue == NULL)) {
9092 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9093 	         "xmlParseStartTag: problem parsing attributes\n");
9094 	    break;
9095 	}
9096         GROW;
9097 	if (ctxt->input->base != base) goto base_changed;
9098     }
9099 
9100     /*
9101      * The attributes defaulting
9102      */
9103     if (ctxt->attsDefault != NULL) {
9104         xmlDefAttrsPtr defaults;
9105 
9106 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9107 	if (defaults != NULL) {
9108 	    for (i = 0;i < defaults->nbAttrs;i++) {
9109 	        attname = defaults->values[5 * i];
9110 		aprefix = defaults->values[5 * i + 1];
9111 
9112                 /*
9113 		 * special work for namespaces defaulted defs
9114 		 */
9115 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9116 		    /*
9117 		     * check that it's not a defined namespace
9118 		     */
9119 		    for (j = 1;j <= nbNs;j++)
9120 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9121 			    break;
9122 	            if (j <= nbNs) continue;
9123 
9124 		    nsname = xmlGetNamespace(ctxt, NULL);
9125 		    if (nsname != defaults->values[5 * i + 2]) {
9126 			if (nsPush(ctxt, NULL,
9127 			           defaults->values[5 * i + 2]) > 0)
9128 			    nbNs++;
9129 		    }
9130 		} else if (aprefix == ctxt->str_xmlns) {
9131 		    /*
9132 		     * check that it's not a defined namespace
9133 		     */
9134 		    for (j = 1;j <= nbNs;j++)
9135 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9136 			    break;
9137 	            if (j <= nbNs) continue;
9138 
9139 		    nsname = xmlGetNamespace(ctxt, attname);
9140 		    if (nsname != defaults->values[2]) {
9141 			if (nsPush(ctxt, attname,
9142 			           defaults->values[5 * i + 2]) > 0)
9143 			    nbNs++;
9144 		    }
9145 		} else {
9146 		    /*
9147 		     * check that it's not a defined attribute
9148 		     */
9149 		    for (j = 0;j < nbatts;j+=5) {
9150 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9151 			    break;
9152 		    }
9153 		    if (j < nbatts) continue;
9154 
9155 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9156 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9157 			    return(NULL);
9158 			}
9159 			maxatts = ctxt->maxatts;
9160 			atts = ctxt->atts;
9161 		    }
9162 		    atts[nbatts++] = attname;
9163 		    atts[nbatts++] = aprefix;
9164 		    if (aprefix == NULL)
9165 			atts[nbatts++] = NULL;
9166 		    else
9167 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9168 		    atts[nbatts++] = defaults->values[5 * i + 2];
9169 		    atts[nbatts++] = defaults->values[5 * i + 3];
9170 		    if ((ctxt->standalone == 1) &&
9171 		        (defaults->values[5 * i + 4] != NULL)) {
9172 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9173 	  "standalone: attribute %s on %s defaulted from external subset\n",
9174 	                                 attname, localname);
9175 		    }
9176 		    nbdef++;
9177 		}
9178 	    }
9179 	}
9180     }
9181 
9182     /*
9183      * The attributes checkings
9184      */
9185     for (i = 0; i < nbatts;i += 5) {
9186         /*
9187 	* The default namespace does not apply to attribute names.
9188 	*/
9189 	if (atts[i + 1] != NULL) {
9190 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9191 	    if (nsname == NULL) {
9192 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9193 		    "Namespace prefix %s for %s on %s is not defined\n",
9194 		    atts[i + 1], atts[i], localname);
9195 	    }
9196 	    atts[i + 2] = nsname;
9197 	} else
9198 	    nsname = NULL;
9199 	/*
9200 	 * [ WFC: Unique Att Spec ]
9201 	 * No attribute name may appear more than once in the same
9202 	 * start-tag or empty-element tag.
9203 	 * As extended by the Namespace in XML REC.
9204 	 */
9205         for (j = 0; j < i;j += 5) {
9206 	    if (atts[i] == atts[j]) {
9207 	        if (atts[i+1] == atts[j+1]) {
9208 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9209 		    break;
9210 		}
9211 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9212 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9213 			     "Namespaced Attribute %s in '%s' redefined\n",
9214 			     atts[i], nsname, NULL);
9215 		    break;
9216 		}
9217 	    }
9218 	}
9219     }
9220 
9221     nsname = xmlGetNamespace(ctxt, prefix);
9222     if ((prefix != NULL) && (nsname == NULL)) {
9223 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9224 	         "Namespace prefix %s on %s is not defined\n",
9225 		 prefix, localname, NULL);
9226     }
9227     *pref = prefix;
9228     *URI = nsname;
9229 
9230     /*
9231      * SAX: Start of Element !
9232      */
9233     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9234 	(!ctxt->disableSAX)) {
9235 	if (nbNs > 0)
9236 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9237 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9238 			  nbatts / 5, nbdef, atts);
9239 	else
9240 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9241 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9242     }
9243 
9244     /*
9245      * Free up attribute allocated strings if needed
9246      */
9247     if (attval != 0) {
9248 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9249 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9250 	        xmlFree((xmlChar *) atts[i]);
9251     }
9252 
9253     return(localname);
9254 
9255 base_changed:
9256     /*
9257      * the attribute strings are valid iif the base didn't changed
9258      */
9259     if (attval != 0) {
9260 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9261 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9262 	        xmlFree((xmlChar *) atts[i]);
9263     }
9264     ctxt->input->cur = ctxt->input->base + cur;
9265     ctxt->input->line = oldline;
9266     ctxt->input->col = oldcol;
9267     if (ctxt->wellFormed == 1) {
9268 	goto reparse;
9269     }
9270     return(NULL);
9271 }
9272 
9273 /**
9274  * xmlParseEndTag2:
9275  * @ctxt:  an XML parser context
9276  * @line:  line of the start tag
9277  * @nsNr:  number of namespaces on the start tag
9278  *
9279  * parse an end of tag
9280  *
9281  * [42] ETag ::= '</' Name S? '>'
9282  *
9283  * With namespace
9284  *
9285  * [NS 9] ETag ::= '</' QName S? '>'
9286  */
9287 
9288 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9289 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9290                 const xmlChar *URI, int line, int nsNr, int tlen) {
9291     const xmlChar *name;
9292 
9293     GROW;
9294     if ((RAW != '<') || (NXT(1) != '/')) {
9295 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9296 	return;
9297     }
9298     SKIP(2);
9299 
9300     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9301         if (ctxt->input->cur[tlen] == '>') {
9302 	    ctxt->input->cur += tlen + 1;
9303 	    goto done;
9304 	}
9305 	ctxt->input->cur += tlen;
9306 	name = (xmlChar*)1;
9307     } else {
9308 	if (prefix == NULL)
9309 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9310 	else
9311 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9312     }
9313 
9314     /*
9315      * We should definitely be at the ending "S? '>'" part
9316      */
9317     GROW;
9318     SKIP_BLANKS;
9319     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9320 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9321     } else
9322 	NEXT1;
9323 
9324     /*
9325      * [ WFC: Element Type Match ]
9326      * The Name in an element's end-tag must match the element type in the
9327      * start-tag.
9328      *
9329      */
9330     if (name != (xmlChar*)1) {
9331         if (name == NULL) name = BAD_CAST "unparseable";
9332         if ((line == 0) && (ctxt->node != NULL))
9333             line = ctxt->node->line;
9334         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9335 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9336 		                ctxt->name, line, name);
9337     }
9338 
9339     /*
9340      * SAX: End of Tag
9341      */
9342 done:
9343     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9344 	(!ctxt->disableSAX))
9345 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9346 
9347     spacePop(ctxt);
9348     if (nsNr != 0)
9349 	nsPop(ctxt, nsNr);
9350     return;
9351 }
9352 
9353 /**
9354  * xmlParseCDSect:
9355  * @ctxt:  an XML parser context
9356  *
9357  * Parse escaped pure raw content.
9358  *
9359  * [18] CDSect ::= CDStart CData CDEnd
9360  *
9361  * [19] CDStart ::= '<![CDATA['
9362  *
9363  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9364  *
9365  * [21] CDEnd ::= ']]>'
9366  */
9367 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9368 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9369     xmlChar *buf = NULL;
9370     int len = 0;
9371     int size = XML_PARSER_BUFFER_SIZE;
9372     int r, rl;
9373     int	s, sl;
9374     int cur, l;
9375     int count = 0;
9376 
9377     /* Check 2.6.0 was NXT(0) not RAW */
9378     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9379 	SKIP(9);
9380     } else
9381         return;
9382 
9383     ctxt->instate = XML_PARSER_CDATA_SECTION;
9384     r = CUR_CHAR(rl);
9385     if (!IS_CHAR(r)) {
9386 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9387 	ctxt->instate = XML_PARSER_CONTENT;
9388         return;
9389     }
9390     NEXTL(rl);
9391     s = CUR_CHAR(sl);
9392     if (!IS_CHAR(s)) {
9393 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9394 	ctxt->instate = XML_PARSER_CONTENT;
9395         return;
9396     }
9397     NEXTL(sl);
9398     cur = CUR_CHAR(l);
9399     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9400     if (buf == NULL) {
9401 	xmlErrMemory(ctxt, NULL);
9402 	return;
9403     }
9404     while (IS_CHAR(cur) &&
9405            ((r != ']') || (s != ']') || (cur != '>'))) {
9406 	if (len + 5 >= size) {
9407 	    xmlChar *tmp;
9408 
9409 	    size *= 2;
9410 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9411 	    if (tmp == NULL) {
9412 	        xmlFree(buf);
9413 		xmlErrMemory(ctxt, NULL);
9414 		return;
9415 	    }
9416 	    buf = tmp;
9417 	}
9418 	COPY_BUF(rl,buf,len,r);
9419 	r = s;
9420 	rl = sl;
9421 	s = cur;
9422 	sl = l;
9423 	count++;
9424 	if (count > 50) {
9425 	    GROW;
9426 	    count = 0;
9427 	}
9428 	NEXTL(l);
9429 	cur = CUR_CHAR(l);
9430     }
9431     buf[len] = 0;
9432     ctxt->instate = XML_PARSER_CONTENT;
9433     if (cur != '>') {
9434 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9435 	                     "CData section not finished\n%.50s\n", buf);
9436 	xmlFree(buf);
9437         return;
9438     }
9439     NEXTL(l);
9440 
9441     /*
9442      * OK the buffer is to be consumed as cdata.
9443      */
9444     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9445 	if (ctxt->sax->cdataBlock != NULL)
9446 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9447 	else if (ctxt->sax->characters != NULL)
9448 	    ctxt->sax->characters(ctxt->userData, buf, len);
9449     }
9450     xmlFree(buf);
9451 }
9452 
9453 /**
9454  * xmlParseContent:
9455  * @ctxt:  an XML parser context
9456  *
9457  * Parse a content:
9458  *
9459  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9460  */
9461 
9462 void
xmlParseContent(xmlParserCtxtPtr ctxt)9463 xmlParseContent(xmlParserCtxtPtr ctxt) {
9464     GROW;
9465     while ((RAW != 0) &&
9466 	   ((RAW != '<') || (NXT(1) != '/')) &&
9467 	   (ctxt->instate != XML_PARSER_EOF)) {
9468 	const xmlChar *test = CUR_PTR;
9469 	unsigned int cons = ctxt->input->consumed;
9470 	const xmlChar *cur = ctxt->input->cur;
9471 
9472 	/*
9473 	 * First case : a Processing Instruction.
9474 	 */
9475 	if ((*cur == '<') && (cur[1] == '?')) {
9476 	    xmlParsePI(ctxt);
9477 	}
9478 
9479 	/*
9480 	 * Second case : a CDSection
9481 	 */
9482 	/* 2.6.0 test was *cur not RAW */
9483 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9484 	    xmlParseCDSect(ctxt);
9485 	}
9486 
9487 	/*
9488 	 * Third case :  a comment
9489 	 */
9490 	else if ((*cur == '<') && (NXT(1) == '!') &&
9491 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9492 	    xmlParseComment(ctxt);
9493 	    ctxt->instate = XML_PARSER_CONTENT;
9494 	}
9495 
9496 	/*
9497 	 * Fourth case :  a sub-element.
9498 	 */
9499 	else if (*cur == '<') {
9500 	    xmlParseElement(ctxt);
9501 	}
9502 
9503 	/*
9504 	 * Fifth case : a reference. If if has not been resolved,
9505 	 *    parsing returns it's Name, create the node
9506 	 */
9507 
9508 	else if (*cur == '&') {
9509 	    xmlParseReference(ctxt);
9510 	}
9511 
9512 	/*
9513 	 * Last case, text. Note that References are handled directly.
9514 	 */
9515 	else {
9516 	    xmlParseCharData(ctxt, 0);
9517 	}
9518 
9519 	GROW;
9520 	/*
9521 	 * Pop-up of finished entities.
9522 	 */
9523 	while ((RAW == 0) && (ctxt->inputNr > 1))
9524 	    xmlPopInput(ctxt);
9525 	SHRINK;
9526 
9527 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9528 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9529 	                "detected an error in element content\n");
9530 	    ctxt->instate = XML_PARSER_EOF;
9531             break;
9532 	}
9533     }
9534 }
9535 
9536 /**
9537  * xmlParseElement:
9538  * @ctxt:  an XML parser context
9539  *
9540  * parse an XML element, this is highly recursive
9541  *
9542  * [39] element ::= EmptyElemTag | STag content ETag
9543  *
9544  * [ WFC: Element Type Match ]
9545  * The Name in an element's end-tag must match the element type in the
9546  * start-tag.
9547  *
9548  */
9549 
9550 void
xmlParseElement(xmlParserCtxtPtr ctxt)9551 xmlParseElement(xmlParserCtxtPtr ctxt) {
9552     const xmlChar *name;
9553     const xmlChar *prefix = NULL;
9554     const xmlChar *URI = NULL;
9555     xmlParserNodeInfo node_info;
9556     int line, tlen;
9557     xmlNodePtr ret;
9558     int nsNr = ctxt->nsNr;
9559 
9560     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9561         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9562 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9563 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9564 			  xmlParserMaxDepth);
9565 	ctxt->instate = XML_PARSER_EOF;
9566 	return;
9567     }
9568 
9569     /* Capture start position */
9570     if (ctxt->record_info) {
9571         node_info.begin_pos = ctxt->input->consumed +
9572                           (CUR_PTR - ctxt->input->base);
9573 	node_info.begin_line = ctxt->input->line;
9574     }
9575 
9576     if (ctxt->spaceNr == 0)
9577 	spacePush(ctxt, -1);
9578     else if (*ctxt->space == -2)
9579 	spacePush(ctxt, -1);
9580     else
9581 	spacePush(ctxt, *ctxt->space);
9582 
9583     line = ctxt->input->line;
9584 #ifdef LIBXML_SAX1_ENABLED
9585     if (ctxt->sax2)
9586 #endif /* LIBXML_SAX1_ENABLED */
9587         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9588 #ifdef LIBXML_SAX1_ENABLED
9589     else
9590 	name = xmlParseStartTag(ctxt);
9591 #endif /* LIBXML_SAX1_ENABLED */
9592     if (name == NULL) {
9593 	spacePop(ctxt);
9594         return;
9595     }
9596     namePush(ctxt, name);
9597     ret = ctxt->node;
9598 
9599 #ifdef LIBXML_VALID_ENABLED
9600     /*
9601      * [ VC: Root Element Type ]
9602      * The Name in the document type declaration must match the element
9603      * type of the root element.
9604      */
9605     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9606         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9607         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9608 #endif /* LIBXML_VALID_ENABLED */
9609 
9610     /*
9611      * Check for an Empty Element.
9612      */
9613     if ((RAW == '/') && (NXT(1) == '>')) {
9614         SKIP(2);
9615 	if (ctxt->sax2) {
9616 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9617 		(!ctxt->disableSAX))
9618 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9619 #ifdef LIBXML_SAX1_ENABLED
9620 	} else {
9621 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9622 		(!ctxt->disableSAX))
9623 		ctxt->sax->endElement(ctxt->userData, name);
9624 #endif /* LIBXML_SAX1_ENABLED */
9625 	}
9626 	namePop(ctxt);
9627 	spacePop(ctxt);
9628 	if (nsNr != ctxt->nsNr)
9629 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9630 	if ( ret != NULL && ctxt->record_info ) {
9631 	   node_info.end_pos = ctxt->input->consumed +
9632 			      (CUR_PTR - ctxt->input->base);
9633 	   node_info.end_line = ctxt->input->line;
9634 	   node_info.node = ret;
9635 	   xmlParserAddNodeInfo(ctxt, &node_info);
9636 	}
9637 	return;
9638     }
9639     if (RAW == '>') {
9640         NEXT1;
9641     } else {
9642         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9643 		     "Couldn't find end of Start Tag %s line %d\n",
9644 		                name, line, NULL);
9645 
9646 	/*
9647 	 * end of parsing of this node.
9648 	 */
9649 	nodePop(ctxt);
9650 	namePop(ctxt);
9651 	spacePop(ctxt);
9652 	if (nsNr != ctxt->nsNr)
9653 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9654 
9655 	/*
9656 	 * Capture end position and add node
9657 	 */
9658 	if ( ret != NULL && ctxt->record_info ) {
9659 	   node_info.end_pos = ctxt->input->consumed +
9660 			      (CUR_PTR - ctxt->input->base);
9661 	   node_info.end_line = ctxt->input->line;
9662 	   node_info.node = ret;
9663 	   xmlParserAddNodeInfo(ctxt, &node_info);
9664 	}
9665 	return;
9666     }
9667 
9668     /*
9669      * Parse the content of the element:
9670      */
9671     xmlParseContent(ctxt);
9672     if (!IS_BYTE_CHAR(RAW)) {
9673         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9674 	 "Premature end of data in tag %s line %d\n",
9675 		                name, line, NULL);
9676 
9677 	/*
9678 	 * end of parsing of this node.
9679 	 */
9680 	nodePop(ctxt);
9681 	namePop(ctxt);
9682 	spacePop(ctxt);
9683 	if (nsNr != ctxt->nsNr)
9684 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9685 	return;
9686     }
9687 
9688     /*
9689      * parse the end of tag: '</' should be here.
9690      */
9691     if (ctxt->sax2) {
9692 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9693 	namePop(ctxt);
9694     }
9695 #ifdef LIBXML_SAX1_ENABLED
9696       else
9697 	xmlParseEndTag1(ctxt, line);
9698 #endif /* LIBXML_SAX1_ENABLED */
9699 
9700     /*
9701      * Capture end position and add node
9702      */
9703     if ( ret != NULL && ctxt->record_info ) {
9704        node_info.end_pos = ctxt->input->consumed +
9705                           (CUR_PTR - ctxt->input->base);
9706        node_info.end_line = ctxt->input->line;
9707        node_info.node = ret;
9708        xmlParserAddNodeInfo(ctxt, &node_info);
9709     }
9710 }
9711 
9712 /**
9713  * xmlParseVersionNum:
9714  * @ctxt:  an XML parser context
9715  *
9716  * parse the XML version value.
9717  *
9718  * [26] VersionNum ::= '1.' [0-9]+
9719  *
9720  * In practice allow [0-9].[0-9]+ at that level
9721  *
9722  * Returns the string giving the XML version number, or NULL
9723  */
9724 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9725 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9726     xmlChar *buf = NULL;
9727     int len = 0;
9728     int size = 10;
9729     xmlChar cur;
9730 
9731     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9732     if (buf == NULL) {
9733 	xmlErrMemory(ctxt, NULL);
9734 	return(NULL);
9735     }
9736     cur = CUR;
9737     if (!((cur >= '0') && (cur <= '9'))) {
9738 	xmlFree(buf);
9739 	return(NULL);
9740     }
9741     buf[len++] = cur;
9742     NEXT;
9743     cur=CUR;
9744     if (cur != '.') {
9745 	xmlFree(buf);
9746 	return(NULL);
9747     }
9748     buf[len++] = cur;
9749     NEXT;
9750     cur=CUR;
9751     while ((cur >= '0') && (cur <= '9')) {
9752 	if (len + 1 >= size) {
9753 	    xmlChar *tmp;
9754 
9755 	    size *= 2;
9756 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9757 	    if (tmp == NULL) {
9758 	        xmlFree(buf);
9759 		xmlErrMemory(ctxt, NULL);
9760 		return(NULL);
9761 	    }
9762 	    buf = tmp;
9763 	}
9764 	buf[len++] = cur;
9765 	NEXT;
9766 	cur=CUR;
9767     }
9768     buf[len] = 0;
9769     return(buf);
9770 }
9771 
9772 /**
9773  * xmlParseVersionInfo:
9774  * @ctxt:  an XML parser context
9775  *
9776  * parse the XML version.
9777  *
9778  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9779  *
9780  * [25] Eq ::= S? '=' S?
9781  *
9782  * Returns the version string, e.g. "1.0"
9783  */
9784 
9785 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9786 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9787     xmlChar *version = NULL;
9788 
9789     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9790 	SKIP(7);
9791 	SKIP_BLANKS;
9792 	if (RAW != '=') {
9793 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9794 	    return(NULL);
9795         }
9796 	NEXT;
9797 	SKIP_BLANKS;
9798 	if (RAW == '"') {
9799 	    NEXT;
9800 	    version = xmlParseVersionNum(ctxt);
9801 	    if (RAW != '"') {
9802 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9803 	    } else
9804 	        NEXT;
9805 	} else if (RAW == '\''){
9806 	    NEXT;
9807 	    version = xmlParseVersionNum(ctxt);
9808 	    if (RAW != '\'') {
9809 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9810 	    } else
9811 	        NEXT;
9812 	} else {
9813 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9814 	}
9815     }
9816     return(version);
9817 }
9818 
9819 /**
9820  * xmlParseEncName:
9821  * @ctxt:  an XML parser context
9822  *
9823  * parse the XML encoding name
9824  *
9825  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9826  *
9827  * Returns the encoding name value or NULL
9828  */
9829 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9830 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9831     xmlChar *buf = NULL;
9832     int len = 0;
9833     int size = 10;
9834     xmlChar cur;
9835 
9836     cur = CUR;
9837     if (((cur >= 'a') && (cur <= 'z')) ||
9838         ((cur >= 'A') && (cur <= 'Z'))) {
9839 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9840 	if (buf == NULL) {
9841 	    xmlErrMemory(ctxt, NULL);
9842 	    return(NULL);
9843 	}
9844 
9845 	buf[len++] = cur;
9846 	NEXT;
9847 	cur = CUR;
9848 	while (((cur >= 'a') && (cur <= 'z')) ||
9849 	       ((cur >= 'A') && (cur <= 'Z')) ||
9850 	       ((cur >= '0') && (cur <= '9')) ||
9851 	       (cur == '.') || (cur == '_') ||
9852 	       (cur == '-')) {
9853 	    if (len + 1 >= size) {
9854 	        xmlChar *tmp;
9855 
9856 		size *= 2;
9857 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9858 		if (tmp == NULL) {
9859 		    xmlErrMemory(ctxt, NULL);
9860 		    xmlFree(buf);
9861 		    return(NULL);
9862 		}
9863 		buf = tmp;
9864 	    }
9865 	    buf[len++] = cur;
9866 	    NEXT;
9867 	    cur = CUR;
9868 	    if (cur == 0) {
9869 	        SHRINK;
9870 		GROW;
9871 		cur = CUR;
9872 	    }
9873         }
9874 	buf[len] = 0;
9875     } else {
9876 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9877     }
9878     return(buf);
9879 }
9880 
9881 /**
9882  * xmlParseEncodingDecl:
9883  * @ctxt:  an XML parser context
9884  *
9885  * parse the XML encoding declaration
9886  *
9887  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
9888  *
9889  * this setups the conversion filters.
9890  *
9891  * Returns the encoding value or NULL
9892  */
9893 
9894 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9895 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9896     xmlChar *encoding = NULL;
9897 
9898     SKIP_BLANKS;
9899     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9900 	SKIP(8);
9901 	SKIP_BLANKS;
9902 	if (RAW != '=') {
9903 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9904 	    return(NULL);
9905         }
9906 	NEXT;
9907 	SKIP_BLANKS;
9908 	if (RAW == '"') {
9909 	    NEXT;
9910 	    encoding = xmlParseEncName(ctxt);
9911 	    if (RAW != '"') {
9912 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9913 	    } else
9914 	        NEXT;
9915 	} else if (RAW == '\''){
9916 	    NEXT;
9917 	    encoding = xmlParseEncName(ctxt);
9918 	    if (RAW != '\'') {
9919 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9920 	    } else
9921 	        NEXT;
9922 	} else {
9923 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9924 	}
9925 	/*
9926 	 * UTF-16 encoding stwich has already taken place at this stage,
9927 	 * more over the little-endian/big-endian selection is already done
9928 	 */
9929         if ((encoding != NULL) &&
9930 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9931 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9932 	    /*
9933 	     * If no encoding was passed to the parser, that we are
9934 	     * using UTF-16 and no decoder is present i.e. the
9935 	     * document is apparently UTF-8 compatible, then raise an
9936 	     * encoding mismatch fatal error
9937 	     */
9938 	    if ((ctxt->encoding == NULL) &&
9939 	        (ctxt->input->buf != NULL) &&
9940 	        (ctxt->input->buf->encoder == NULL)) {
9941 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9942 		  "Document labelled UTF-16 but has UTF-8 content\n");
9943 	    }
9944 	    if (ctxt->encoding != NULL)
9945 		xmlFree((xmlChar *) ctxt->encoding);
9946 	    ctxt->encoding = encoding;
9947 	}
9948 	/*
9949 	 * UTF-8 encoding is handled natively
9950 	 */
9951         else if ((encoding != NULL) &&
9952 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9953 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9954 	    if (ctxt->encoding != NULL)
9955 		xmlFree((xmlChar *) ctxt->encoding);
9956 	    ctxt->encoding = encoding;
9957 	}
9958 	else if (encoding != NULL) {
9959 	    xmlCharEncodingHandlerPtr handler;
9960 
9961 	    if (ctxt->input->encoding != NULL)
9962 		xmlFree((xmlChar *) ctxt->input->encoding);
9963 	    ctxt->input->encoding = encoding;
9964 
9965             handler = xmlFindCharEncodingHandler((const char *) encoding);
9966 	    if (handler != NULL) {
9967 		xmlSwitchToEncoding(ctxt, handler);
9968 	    } else {
9969 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9970 			"Unsupported encoding %s\n", encoding);
9971 		return(NULL);
9972 	    }
9973 	}
9974     }
9975     return(encoding);
9976 }
9977 
9978 /**
9979  * xmlParseSDDecl:
9980  * @ctxt:  an XML parser context
9981  *
9982  * parse the XML standalone declaration
9983  *
9984  * [32] SDDecl ::= S 'standalone' Eq
9985  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9986  *
9987  * [ VC: Standalone Document Declaration ]
9988  * TODO The standalone document declaration must have the value "no"
9989  * if any external markup declarations contain declarations of:
9990  *  - attributes with default values, if elements to which these
9991  *    attributes apply appear in the document without specifications
9992  *    of values for these attributes, or
9993  *  - entities (other than amp, lt, gt, apos, quot), if references
9994  *    to those entities appear in the document, or
9995  *  - attributes with values subject to normalization, where the
9996  *    attribute appears in the document with a value which will change
9997  *    as a result of normalization, or
9998  *  - element types with element content, if white space occurs directly
9999  *    within any instance of those types.
10000  *
10001  * Returns:
10002  *   1 if standalone="yes"
10003  *   0 if standalone="no"
10004  *  -2 if standalone attribute is missing or invalid
10005  *	  (A standalone value of -2 means that the XML declaration was found,
10006  *	   but no value was specified for the standalone attribute).
10007  */
10008 
10009 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10010 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10011     int standalone = -2;
10012 
10013     SKIP_BLANKS;
10014     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10015 	SKIP(10);
10016         SKIP_BLANKS;
10017 	if (RAW != '=') {
10018 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10019 	    return(standalone);
10020         }
10021 	NEXT;
10022 	SKIP_BLANKS;
10023         if (RAW == '\''){
10024 	    NEXT;
10025 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10026 	        standalone = 0;
10027                 SKIP(2);
10028 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10029 	               (NXT(2) == 's')) {
10030 	        standalone = 1;
10031 		SKIP(3);
10032             } else {
10033 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10034 	    }
10035 	    if (RAW != '\'') {
10036 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10037 	    } else
10038 	        NEXT;
10039 	} else if (RAW == '"'){
10040 	    NEXT;
10041 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10042 	        standalone = 0;
10043 		SKIP(2);
10044 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10045 	               (NXT(2) == 's')) {
10046 	        standalone = 1;
10047                 SKIP(3);
10048             } else {
10049 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10050 	    }
10051 	    if (RAW != '"') {
10052 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10053 	    } else
10054 	        NEXT;
10055 	} else {
10056 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10057         }
10058     }
10059     return(standalone);
10060 }
10061 
10062 /**
10063  * xmlParseXMLDecl:
10064  * @ctxt:  an XML parser context
10065  *
10066  * parse an XML declaration header
10067  *
10068  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10069  */
10070 
10071 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10072 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10073     xmlChar *version;
10074 
10075     /*
10076      * This value for standalone indicates that the document has an
10077      * XML declaration but it does not have a standalone attribute.
10078      * It will be overwritten later if a standalone attribute is found.
10079      */
10080     ctxt->input->standalone = -2;
10081 
10082     /*
10083      * We know that '<?xml' is here.
10084      */
10085     SKIP(5);
10086 
10087     if (!IS_BLANK_CH(RAW)) {
10088 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10089 	               "Blank needed after '<?xml'\n");
10090     }
10091     SKIP_BLANKS;
10092 
10093     /*
10094      * We must have the VersionInfo here.
10095      */
10096     version = xmlParseVersionInfo(ctxt);
10097     if (version == NULL) {
10098 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10099     } else {
10100 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10101 	    /*
10102 	     * Changed here for XML-1.0 5th edition
10103 	     */
10104 	    if (ctxt->options & XML_PARSE_OLD10) {
10105 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10106 			          "Unsupported version '%s'\n",
10107 			          version);
10108 	    } else {
10109 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10110 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10111 		                  "Unsupported version '%s'\n",
10112 				  version, NULL);
10113 		} else {
10114 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10115 				      "Unsupported version '%s'\n",
10116 				      version);
10117 		}
10118 	    }
10119 	}
10120 	if (ctxt->version != NULL)
10121 	    xmlFree((void *) ctxt->version);
10122 	ctxt->version = version;
10123     }
10124 
10125     /*
10126      * We may have the encoding declaration
10127      */
10128     if (!IS_BLANK_CH(RAW)) {
10129         if ((RAW == '?') && (NXT(1) == '>')) {
10130 	    SKIP(2);
10131 	    return;
10132 	}
10133 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10134     }
10135     xmlParseEncodingDecl(ctxt);
10136     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10137 	/*
10138 	 * The XML REC instructs us to stop parsing right here
10139 	 */
10140         return;
10141     }
10142 
10143     /*
10144      * We may have the standalone status.
10145      */
10146     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10147         if ((RAW == '?') && (NXT(1) == '>')) {
10148 	    SKIP(2);
10149 	    return;
10150 	}
10151 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10152     }
10153 
10154     /*
10155      * We can grow the input buffer freely at that point
10156      */
10157     GROW;
10158 
10159     SKIP_BLANKS;
10160     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10161 
10162     SKIP_BLANKS;
10163     if ((RAW == '?') && (NXT(1) == '>')) {
10164         SKIP(2);
10165     } else if (RAW == '>') {
10166         /* Deprecated old WD ... */
10167 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10168 	NEXT;
10169     } else {
10170 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10171 	MOVETO_ENDTAG(CUR_PTR);
10172 	NEXT;
10173     }
10174 }
10175 
10176 /**
10177  * xmlParseMisc:
10178  * @ctxt:  an XML parser context
10179  *
10180  * parse an XML Misc* optional field.
10181  *
10182  * [27] Misc ::= Comment | PI |  S
10183  */
10184 
10185 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10186 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10187     while (((RAW == '<') && (NXT(1) == '?')) ||
10188            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10189            IS_BLANK_CH(CUR)) {
10190         if ((RAW == '<') && (NXT(1) == '?')) {
10191 	    xmlParsePI(ctxt);
10192 	} else if (IS_BLANK_CH(CUR)) {
10193 	    NEXT;
10194 	} else
10195 	    xmlParseComment(ctxt);
10196     }
10197 }
10198 
10199 /**
10200  * xmlParseDocument:
10201  * @ctxt:  an XML parser context
10202  *
10203  * parse an XML document (and build a tree if using the standard SAX
10204  * interface).
10205  *
10206  * [1] document ::= prolog element Misc*
10207  *
10208  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10209  *
10210  * Returns 0, -1 in case of error. the parser context is augmented
10211  *                as a result of the parsing.
10212  */
10213 
10214 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10215 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10216     xmlChar start[4];
10217     xmlCharEncoding enc;
10218 
10219     xmlInitParser();
10220 
10221     if ((ctxt == NULL) || (ctxt->input == NULL))
10222         return(-1);
10223 
10224     GROW;
10225 
10226     /*
10227      * SAX: detecting the level.
10228      */
10229     xmlDetectSAX2(ctxt);
10230 
10231     /*
10232      * SAX: beginning of the document processing.
10233      */
10234     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10235         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10236 
10237     if ((ctxt->encoding == NULL) &&
10238         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10239 	/*
10240 	 * Get the 4 first bytes and decode the charset
10241 	 * if enc != XML_CHAR_ENCODING_NONE
10242 	 * plug some encoding conversion routines.
10243 	 */
10244 	start[0] = RAW;
10245 	start[1] = NXT(1);
10246 	start[2] = NXT(2);
10247 	start[3] = NXT(3);
10248 	enc = xmlDetectCharEncoding(&start[0], 4);
10249 	if (enc != XML_CHAR_ENCODING_NONE) {
10250 	    xmlSwitchEncoding(ctxt, enc);
10251 	}
10252     }
10253 
10254 
10255     if (CUR == 0) {
10256 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10257     }
10258 
10259     /*
10260      * Check for the XMLDecl in the Prolog.
10261      * do not GROW here to avoid the detected encoder to decode more
10262      * than just the first line, unless the amount of data is really
10263      * too small to hold "<?xml version="1.0" encoding="foo"
10264      */
10265     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10266        GROW;
10267     }
10268     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10269 
10270 	/*
10271 	 * Note that we will switch encoding on the fly.
10272 	 */
10273 	xmlParseXMLDecl(ctxt);
10274 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10275 	    /*
10276 	     * The XML REC instructs us to stop parsing right here
10277 	     */
10278 	    return(-1);
10279 	}
10280 	ctxt->standalone = ctxt->input->standalone;
10281 	SKIP_BLANKS;
10282     } else {
10283 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10284     }
10285     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10286         ctxt->sax->startDocument(ctxt->userData);
10287 
10288     /*
10289      * The Misc part of the Prolog
10290      */
10291     GROW;
10292     xmlParseMisc(ctxt);
10293 
10294     /*
10295      * Then possibly doc type declaration(s) and more Misc
10296      * (doctypedecl Misc*)?
10297      */
10298     GROW;
10299     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10300 
10301 	ctxt->inSubset = 1;
10302 	xmlParseDocTypeDecl(ctxt);
10303 	if (RAW == '[') {
10304 	    ctxt->instate = XML_PARSER_DTD;
10305 	    xmlParseInternalSubset(ctxt);
10306 	}
10307 
10308 	/*
10309 	 * Create and update the external subset.
10310 	 */
10311 	ctxt->inSubset = 2;
10312 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10313 	    (!ctxt->disableSAX))
10314 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10315 	                              ctxt->extSubSystem, ctxt->extSubURI);
10316 	ctxt->inSubset = 0;
10317 
10318         xmlCleanSpecialAttr(ctxt);
10319 
10320 	ctxt->instate = XML_PARSER_PROLOG;
10321 	xmlParseMisc(ctxt);
10322     }
10323 
10324     /*
10325      * Time to start parsing the tree itself
10326      */
10327     GROW;
10328     if (RAW != '<') {
10329 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10330 		       "Start tag expected, '<' not found\n");
10331     } else {
10332 	ctxt->instate = XML_PARSER_CONTENT;
10333 	xmlParseElement(ctxt);
10334 	ctxt->instate = XML_PARSER_EPILOG;
10335 
10336 
10337 	/*
10338 	 * The Misc part at the end
10339 	 */
10340 	xmlParseMisc(ctxt);
10341 
10342 	if (RAW != 0) {
10343 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10344 	}
10345 	ctxt->instate = XML_PARSER_EOF;
10346     }
10347 
10348     /*
10349      * SAX: end of the document processing.
10350      */
10351     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10352         ctxt->sax->endDocument(ctxt->userData);
10353 
10354     /*
10355      * Remove locally kept entity definitions if the tree was not built
10356      */
10357     if ((ctxt->myDoc != NULL) &&
10358 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10359 	xmlFreeDoc(ctxt->myDoc);
10360 	ctxt->myDoc = NULL;
10361     }
10362 
10363     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10364         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10365 	if (ctxt->valid)
10366 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10367 	if (ctxt->nsWellFormed)
10368 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10369 	if (ctxt->options & XML_PARSE_OLD10)
10370 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10371     }
10372     if (! ctxt->wellFormed) {
10373 	ctxt->valid = 0;
10374 	return(-1);
10375     }
10376     return(0);
10377 }
10378 
10379 /**
10380  * xmlParseExtParsedEnt:
10381  * @ctxt:  an XML parser context
10382  *
10383  * parse a general parsed entity
10384  * An external general parsed entity is well-formed if it matches the
10385  * production labeled extParsedEnt.
10386  *
10387  * [78] extParsedEnt ::= TextDecl? content
10388  *
10389  * Returns 0, -1 in case of error. the parser context is augmented
10390  *                as a result of the parsing.
10391  */
10392 
10393 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10394 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10395     xmlChar start[4];
10396     xmlCharEncoding enc;
10397 
10398     if ((ctxt == NULL) || (ctxt->input == NULL))
10399         return(-1);
10400 
10401     xmlDefaultSAXHandlerInit();
10402 
10403     xmlDetectSAX2(ctxt);
10404 
10405     GROW;
10406 
10407     /*
10408      * SAX: beginning of the document processing.
10409      */
10410     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10411         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10412 
10413     /*
10414      * Get the 4 first bytes and decode the charset
10415      * if enc != XML_CHAR_ENCODING_NONE
10416      * plug some encoding conversion routines.
10417      */
10418     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10419 	start[0] = RAW;
10420 	start[1] = NXT(1);
10421 	start[2] = NXT(2);
10422 	start[3] = NXT(3);
10423 	enc = xmlDetectCharEncoding(start, 4);
10424 	if (enc != XML_CHAR_ENCODING_NONE) {
10425 	    xmlSwitchEncoding(ctxt, enc);
10426 	}
10427     }
10428 
10429 
10430     if (CUR == 0) {
10431 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10432     }
10433 
10434     /*
10435      * Check for the XMLDecl in the Prolog.
10436      */
10437     GROW;
10438     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10439 
10440 	/*
10441 	 * Note that we will switch encoding on the fly.
10442 	 */
10443 	xmlParseXMLDecl(ctxt);
10444 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10445 	    /*
10446 	     * The XML REC instructs us to stop parsing right here
10447 	     */
10448 	    return(-1);
10449 	}
10450 	SKIP_BLANKS;
10451     } else {
10452 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10453     }
10454     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10455         ctxt->sax->startDocument(ctxt->userData);
10456 
10457     /*
10458      * Doing validity checking on chunk doesn't make sense
10459      */
10460     ctxt->instate = XML_PARSER_CONTENT;
10461     ctxt->validate = 0;
10462     ctxt->loadsubset = 0;
10463     ctxt->depth = 0;
10464 
10465     xmlParseContent(ctxt);
10466 
10467     if ((RAW == '<') && (NXT(1) == '/')) {
10468 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10469     } else if (RAW != 0) {
10470 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10471     }
10472 
10473     /*
10474      * SAX: end of the document processing.
10475      */
10476     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10477         ctxt->sax->endDocument(ctxt->userData);
10478 
10479     if (! ctxt->wellFormed) return(-1);
10480     return(0);
10481 }
10482 
10483 #ifdef LIBXML_PUSH_ENABLED
10484 /************************************************************************
10485  *									*
10486  * 		Progressive parsing interfaces				*
10487  *									*
10488  ************************************************************************/
10489 
10490 /**
10491  * xmlParseLookupSequence:
10492  * @ctxt:  an XML parser context
10493  * @first:  the first char to lookup
10494  * @next:  the next char to lookup or zero
10495  * @third:  the next char to lookup or zero
10496  *
10497  * Try to find if a sequence (first, next, third) or  just (first next) or
10498  * (first) is available in the input stream.
10499  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10500  * to avoid rescanning sequences of bytes, it DOES change the state of the
10501  * parser, do not use liberally.
10502  *
10503  * Returns the index to the current parsing point if the full sequence
10504  *      is available, -1 otherwise.
10505  */
10506 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10507 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10508                        xmlChar next, xmlChar third) {
10509     int base, len;
10510     xmlParserInputPtr in;
10511     const xmlChar *buf;
10512 
10513     in = ctxt->input;
10514     if (in == NULL) return(-1);
10515     base = in->cur - in->base;
10516     if (base < 0) return(-1);
10517     if (ctxt->checkIndex > base)
10518         base = ctxt->checkIndex;
10519     if (in->buf == NULL) {
10520 	buf = in->base;
10521 	len = in->length;
10522     } else {
10523 	buf = in->buf->buffer->content;
10524 	len = in->buf->buffer->use;
10525     }
10526     /* take into account the sequence length */
10527     if (third) len -= 2;
10528     else if (next) len --;
10529     for (;base < len;base++) {
10530         if (buf[base] == first) {
10531 	    if (third != 0) {
10532 		if ((buf[base + 1] != next) ||
10533 		    (buf[base + 2] != third)) continue;
10534 	    } else if (next != 0) {
10535 		if (buf[base + 1] != next) continue;
10536 	    }
10537 	    ctxt->checkIndex = 0;
10538 #ifdef DEBUG_PUSH
10539 	    if (next == 0)
10540 		xmlGenericError(xmlGenericErrorContext,
10541 			"PP: lookup '%c' found at %d\n",
10542 			first, base);
10543 	    else if (third == 0)
10544 		xmlGenericError(xmlGenericErrorContext,
10545 			"PP: lookup '%c%c' found at %d\n",
10546 			first, next, base);
10547 	    else
10548 		xmlGenericError(xmlGenericErrorContext,
10549 			"PP: lookup '%c%c%c' found at %d\n",
10550 			first, next, third, base);
10551 #endif
10552 	    return(base - (in->cur - in->base));
10553 	}
10554     }
10555     ctxt->checkIndex = base;
10556 #ifdef DEBUG_PUSH
10557     if (next == 0)
10558 	xmlGenericError(xmlGenericErrorContext,
10559 		"PP: lookup '%c' failed\n", first);
10560     else if (third == 0)
10561 	xmlGenericError(xmlGenericErrorContext,
10562 		"PP: lookup '%c%c' failed\n", first, next);
10563     else
10564 	xmlGenericError(xmlGenericErrorContext,
10565 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10566 #endif
10567     return(-1);
10568 }
10569 
10570 /**
10571  * xmlParseGetLasts:
10572  * @ctxt:  an XML parser context
10573  * @lastlt:  pointer to store the last '<' from the input
10574  * @lastgt:  pointer to store the last '>' from the input
10575  *
10576  * Lookup the last < and > in the current chunk
10577  */
10578 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10579 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10580                  const xmlChar **lastgt) {
10581     const xmlChar *tmp;
10582 
10583     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10584 	xmlGenericError(xmlGenericErrorContext,
10585 		    "Internal error: xmlParseGetLasts\n");
10586 	return;
10587     }
10588     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10589         tmp = ctxt->input->end;
10590 	tmp--;
10591 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10592 	if (tmp < ctxt->input->base) {
10593 	    *lastlt = NULL;
10594 	    *lastgt = NULL;
10595 	} else {
10596 	    *lastlt = tmp;
10597 	    tmp++;
10598 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10599 	        if (*tmp == '\'') {
10600 		    tmp++;
10601 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10602 		    if (tmp < ctxt->input->end) tmp++;
10603 		} else if (*tmp == '"') {
10604 		    tmp++;
10605 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10606 		    if (tmp < ctxt->input->end) tmp++;
10607 		} else
10608 		    tmp++;
10609 	    }
10610 	    if (tmp < ctxt->input->end)
10611 	        *lastgt = tmp;
10612 	    else {
10613 	        tmp = *lastlt;
10614 		tmp--;
10615 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10616 		if (tmp >= ctxt->input->base)
10617 		    *lastgt = tmp;
10618 		else
10619 		    *lastgt = NULL;
10620 	    }
10621 	}
10622     } else {
10623         *lastlt = NULL;
10624 	*lastgt = NULL;
10625     }
10626 }
10627 /**
10628  * xmlCheckCdataPush:
10629  * @cur: pointer to the bock of characters
10630  * @len: length of the block in bytes
10631  *
10632  * Check that the block of characters is okay as SCdata content [20]
10633  *
10634  * Returns the number of bytes to pass if okay, a negative index where an
10635  *         UTF-8 error occured otherwise
10636  */
10637 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10638 xmlCheckCdataPush(const xmlChar *utf, int len) {
10639     int ix;
10640     unsigned char c;
10641     int codepoint;
10642 
10643     if ((utf == NULL) || (len <= 0))
10644         return(0);
10645 
10646     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10647         c = utf[ix];
10648         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10649 	    if (c >= 0x20)
10650 		ix++;
10651 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10652 	        ix++;
10653 	    else
10654 	        return(-ix);
10655 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10656 	    if (ix + 2 > len) return(ix);
10657 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10658 	        return(-ix);
10659 	    codepoint = (utf[ix] & 0x1f) << 6;
10660 	    codepoint |= utf[ix+1] & 0x3f;
10661 	    if (!xmlIsCharQ(codepoint))
10662 	        return(-ix);
10663 	    ix += 2;
10664 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10665 	    if (ix + 3 > len) return(ix);
10666 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10667 	        ((utf[ix+2] & 0xc0) != 0x80))
10668 		    return(-ix);
10669 	    codepoint = (utf[ix] & 0xf) << 12;
10670 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
10671 	    codepoint |= utf[ix+2] & 0x3f;
10672 	    if (!xmlIsCharQ(codepoint))
10673 	        return(-ix);
10674 	    ix += 3;
10675 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10676 	    if (ix + 4 > len) return(ix);
10677 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10678 	        ((utf[ix+2] & 0xc0) != 0x80) ||
10679 		((utf[ix+3] & 0xc0) != 0x80))
10680 		    return(-ix);
10681 	    codepoint = (utf[ix] & 0x7) << 18;
10682 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
10683 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
10684 	    codepoint |= utf[ix+3] & 0x3f;
10685 	    if (!xmlIsCharQ(codepoint))
10686 	        return(-ix);
10687 	    ix += 4;
10688 	} else				/* unknown encoding */
10689 	    return(-ix);
10690       }
10691       return(ix);
10692 }
10693 
10694 /**
10695  * xmlParseTryOrFinish:
10696  * @ctxt:  an XML parser context
10697  * @terminate:  last chunk indicator
10698  *
10699  * Try to progress on parsing
10700  *
10701  * Returns zero if no parsing was possible
10702  */
10703 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10704 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10705     int ret = 0;
10706     int avail, tlen;
10707     xmlChar cur, next;
10708     const xmlChar *lastlt, *lastgt;
10709 
10710     if (ctxt->input == NULL)
10711         return(0);
10712 
10713 #ifdef DEBUG_PUSH
10714     switch (ctxt->instate) {
10715 	case XML_PARSER_EOF:
10716 	    xmlGenericError(xmlGenericErrorContext,
10717 		    "PP: try EOF\n"); break;
10718 	case XML_PARSER_START:
10719 	    xmlGenericError(xmlGenericErrorContext,
10720 		    "PP: try START\n"); break;
10721 	case XML_PARSER_MISC:
10722 	    xmlGenericError(xmlGenericErrorContext,
10723 		    "PP: try MISC\n");break;
10724 	case XML_PARSER_COMMENT:
10725 	    xmlGenericError(xmlGenericErrorContext,
10726 		    "PP: try COMMENT\n");break;
10727 	case XML_PARSER_PROLOG:
10728 	    xmlGenericError(xmlGenericErrorContext,
10729 		    "PP: try PROLOG\n");break;
10730 	case XML_PARSER_START_TAG:
10731 	    xmlGenericError(xmlGenericErrorContext,
10732 		    "PP: try START_TAG\n");break;
10733 	case XML_PARSER_CONTENT:
10734 	    xmlGenericError(xmlGenericErrorContext,
10735 		    "PP: try CONTENT\n");break;
10736 	case XML_PARSER_CDATA_SECTION:
10737 	    xmlGenericError(xmlGenericErrorContext,
10738 		    "PP: try CDATA_SECTION\n");break;
10739 	case XML_PARSER_END_TAG:
10740 	    xmlGenericError(xmlGenericErrorContext,
10741 		    "PP: try END_TAG\n");break;
10742 	case XML_PARSER_ENTITY_DECL:
10743 	    xmlGenericError(xmlGenericErrorContext,
10744 		    "PP: try ENTITY_DECL\n");break;
10745 	case XML_PARSER_ENTITY_VALUE:
10746 	    xmlGenericError(xmlGenericErrorContext,
10747 		    "PP: try ENTITY_VALUE\n");break;
10748 	case XML_PARSER_ATTRIBUTE_VALUE:
10749 	    xmlGenericError(xmlGenericErrorContext,
10750 		    "PP: try ATTRIBUTE_VALUE\n");break;
10751 	case XML_PARSER_DTD:
10752 	    xmlGenericError(xmlGenericErrorContext,
10753 		    "PP: try DTD\n");break;
10754 	case XML_PARSER_EPILOG:
10755 	    xmlGenericError(xmlGenericErrorContext,
10756 		    "PP: try EPILOG\n");break;
10757 	case XML_PARSER_PI:
10758 	    xmlGenericError(xmlGenericErrorContext,
10759 		    "PP: try PI\n");break;
10760         case XML_PARSER_IGNORE:
10761             xmlGenericError(xmlGenericErrorContext,
10762 		    "PP: try IGNORE\n");break;
10763     }
10764 #endif
10765 
10766     if ((ctxt->input != NULL) &&
10767         (ctxt->input->cur - ctxt->input->base > 4096)) {
10768 	xmlSHRINK(ctxt);
10769 	ctxt->checkIndex = 0;
10770     }
10771     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10772 
10773     while (1) {
10774 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10775 	    return(0);
10776 
10777 
10778 	/*
10779 	 * Pop-up of finished entities.
10780 	 */
10781 	while ((RAW == 0) && (ctxt->inputNr > 1))
10782 	    xmlPopInput(ctxt);
10783 
10784 	if (ctxt->input == NULL) break;
10785 	if (ctxt->input->buf == NULL)
10786 	    avail = ctxt->input->length -
10787 	            (ctxt->input->cur - ctxt->input->base);
10788 	else {
10789 	    /*
10790 	     * If we are operating on converted input, try to flush
10791 	     * remainng chars to avoid them stalling in the non-converted
10792 	     * buffer.
10793 	     */
10794 	    if ((ctxt->input->buf->raw != NULL) &&
10795 		(ctxt->input->buf->raw->use > 0)) {
10796 		int base = ctxt->input->base -
10797 		           ctxt->input->buf->buffer->content;
10798 		int current = ctxt->input->cur - ctxt->input->base;
10799 
10800 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10801 		ctxt->input->base = ctxt->input->buf->buffer->content + base;
10802 		ctxt->input->cur = ctxt->input->base + current;
10803 		ctxt->input->end =
10804 		    &ctxt->input->buf->buffer->content[
10805 		                       ctxt->input->buf->buffer->use];
10806 	    }
10807 	    avail = ctxt->input->buf->buffer->use -
10808 		    (ctxt->input->cur - ctxt->input->base);
10809 	}
10810         if (avail < 1)
10811 	    goto done;
10812         switch (ctxt->instate) {
10813             case XML_PARSER_EOF:
10814 	        /*
10815 		 * Document parsing is done !
10816 		 */
10817 	        goto done;
10818             case XML_PARSER_START:
10819 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10820 		    xmlChar start[4];
10821 		    xmlCharEncoding enc;
10822 
10823 		    /*
10824 		     * Very first chars read from the document flow.
10825 		     */
10826 		    if (avail < 4)
10827 			goto done;
10828 
10829 		    /*
10830 		     * Get the 4 first bytes and decode the charset
10831 		     * if enc != XML_CHAR_ENCODING_NONE
10832 		     * plug some encoding conversion routines,
10833 		     * else xmlSwitchEncoding will set to (default)
10834 		     * UTF8.
10835 		     */
10836 		    start[0] = RAW;
10837 		    start[1] = NXT(1);
10838 		    start[2] = NXT(2);
10839 		    start[3] = NXT(3);
10840 		    enc = xmlDetectCharEncoding(start, 4);
10841 		    xmlSwitchEncoding(ctxt, enc);
10842 		    break;
10843 		}
10844 
10845 		if (avail < 2)
10846 		    goto done;
10847 		cur = ctxt->input->cur[0];
10848 		next = ctxt->input->cur[1];
10849 		if (cur == 0) {
10850 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10851 			ctxt->sax->setDocumentLocator(ctxt->userData,
10852 						      &xmlDefaultSAXLocator);
10853 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854 		    ctxt->instate = XML_PARSER_EOF;
10855 #ifdef DEBUG_PUSH
10856 		    xmlGenericError(xmlGenericErrorContext,
10857 			    "PP: entering EOF\n");
10858 #endif
10859 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10860 			ctxt->sax->endDocument(ctxt->userData);
10861 		    goto done;
10862 		}
10863 	        if ((cur == '<') && (next == '?')) {
10864 		    /* PI or XML decl */
10865 		    if (avail < 5) return(ret);
10866 		    if ((!terminate) &&
10867 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10868 			return(ret);
10869 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10870 			ctxt->sax->setDocumentLocator(ctxt->userData,
10871 						      &xmlDefaultSAXLocator);
10872 		    if ((ctxt->input->cur[2] == 'x') &&
10873 			(ctxt->input->cur[3] == 'm') &&
10874 			(ctxt->input->cur[4] == 'l') &&
10875 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
10876 			ret += 5;
10877 #ifdef DEBUG_PUSH
10878 			xmlGenericError(xmlGenericErrorContext,
10879 				"PP: Parsing XML Decl\n");
10880 #endif
10881 			xmlParseXMLDecl(ctxt);
10882 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10883 			    /*
10884 			     * The XML REC instructs us to stop parsing right
10885 			     * here
10886 			     */
10887 			    ctxt->instate = XML_PARSER_EOF;
10888 			    return(0);
10889 			}
10890 			ctxt->standalone = ctxt->input->standalone;
10891 			if ((ctxt->encoding == NULL) &&
10892 			    (ctxt->input->encoding != NULL))
10893 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10894 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10895 			    (!ctxt->disableSAX))
10896 			    ctxt->sax->startDocument(ctxt->userData);
10897 			ctxt->instate = XML_PARSER_MISC;
10898 #ifdef DEBUG_PUSH
10899 			xmlGenericError(xmlGenericErrorContext,
10900 				"PP: entering MISC\n");
10901 #endif
10902 		    } else {
10903 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10904 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10905 			    (!ctxt->disableSAX))
10906 			    ctxt->sax->startDocument(ctxt->userData);
10907 			ctxt->instate = XML_PARSER_MISC;
10908 #ifdef DEBUG_PUSH
10909 			xmlGenericError(xmlGenericErrorContext,
10910 				"PP: entering MISC\n");
10911 #endif
10912 		    }
10913 		} else {
10914 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10915 			ctxt->sax->setDocumentLocator(ctxt->userData,
10916 						      &xmlDefaultSAXLocator);
10917 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10918 		    if (ctxt->version == NULL) {
10919 		        xmlErrMemory(ctxt, NULL);
10920 			break;
10921 		    }
10922 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10923 		        (!ctxt->disableSAX))
10924 			ctxt->sax->startDocument(ctxt->userData);
10925 		    ctxt->instate = XML_PARSER_MISC;
10926 #ifdef DEBUG_PUSH
10927 		    xmlGenericError(xmlGenericErrorContext,
10928 			    "PP: entering MISC\n");
10929 #endif
10930 		}
10931 		break;
10932             case XML_PARSER_START_TAG: {
10933 	        const xmlChar *name;
10934 		const xmlChar *prefix = NULL;
10935 		const xmlChar *URI = NULL;
10936 		int nsNr = ctxt->nsNr;
10937 
10938 		if ((avail < 2) && (ctxt->inputNr == 1))
10939 		    goto done;
10940 		cur = ctxt->input->cur[0];
10941 	        if (cur != '<') {
10942 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10943 		    ctxt->instate = XML_PARSER_EOF;
10944 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10945 			ctxt->sax->endDocument(ctxt->userData);
10946 		    goto done;
10947 		}
10948 		if (!terminate) {
10949 		    if (ctxt->progressive) {
10950 		        /* > can be found unescaped in attribute values */
10951 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10952 			    goto done;
10953 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10954 			goto done;
10955 		    }
10956 		}
10957 		if (ctxt->spaceNr == 0)
10958 		    spacePush(ctxt, -1);
10959 		else if (*ctxt->space == -2)
10960 		    spacePush(ctxt, -1);
10961 		else
10962 		    spacePush(ctxt, *ctxt->space);
10963 #ifdef LIBXML_SAX1_ENABLED
10964 		if (ctxt->sax2)
10965 #endif /* LIBXML_SAX1_ENABLED */
10966 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10967 #ifdef LIBXML_SAX1_ENABLED
10968 		else
10969 		    name = xmlParseStartTag(ctxt);
10970 #endif /* LIBXML_SAX1_ENABLED */
10971 		if (name == NULL) {
10972 		    spacePop(ctxt);
10973 		    ctxt->instate = XML_PARSER_EOF;
10974 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10975 			ctxt->sax->endDocument(ctxt->userData);
10976 		    goto done;
10977 		}
10978 #ifdef LIBXML_VALID_ENABLED
10979 		/*
10980 		 * [ VC: Root Element Type ]
10981 		 * The Name in the document type declaration must match
10982 		 * the element type of the root element.
10983 		 */
10984 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10985 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
10986 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10987 #endif /* LIBXML_VALID_ENABLED */
10988 
10989 		/*
10990 		 * Check for an Empty Element.
10991 		 */
10992 		if ((RAW == '/') && (NXT(1) == '>')) {
10993 		    SKIP(2);
10994 
10995 		    if (ctxt->sax2) {
10996 			if ((ctxt->sax != NULL) &&
10997 			    (ctxt->sax->endElementNs != NULL) &&
10998 			    (!ctxt->disableSAX))
10999 			    ctxt->sax->endElementNs(ctxt->userData, name,
11000 			                            prefix, URI);
11001 			if (ctxt->nsNr - nsNr > 0)
11002 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11003 #ifdef LIBXML_SAX1_ENABLED
11004 		    } else {
11005 			if ((ctxt->sax != NULL) &&
11006 			    (ctxt->sax->endElement != NULL) &&
11007 			    (!ctxt->disableSAX))
11008 			    ctxt->sax->endElement(ctxt->userData, name);
11009 #endif /* LIBXML_SAX1_ENABLED */
11010 		    }
11011 		    spacePop(ctxt);
11012 		    if (ctxt->nameNr == 0) {
11013 			ctxt->instate = XML_PARSER_EPILOG;
11014 		    } else {
11015 			ctxt->instate = XML_PARSER_CONTENT;
11016 		    }
11017 		    break;
11018 		}
11019 		if (RAW == '>') {
11020 		    NEXT;
11021 		} else {
11022 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11023 					 "Couldn't find end of Start Tag %s\n",
11024 					 name);
11025 		    nodePop(ctxt);
11026 		    spacePop(ctxt);
11027 		}
11028 		if (ctxt->sax2)
11029 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11030 #ifdef LIBXML_SAX1_ENABLED
11031 		else
11032 		    namePush(ctxt, name);
11033 #endif /* LIBXML_SAX1_ENABLED */
11034 
11035 		ctxt->instate = XML_PARSER_CONTENT;
11036                 break;
11037 	    }
11038             case XML_PARSER_CONTENT: {
11039 		const xmlChar *test;
11040 		unsigned int cons;
11041 		if ((avail < 2) && (ctxt->inputNr == 1))
11042 		    goto done;
11043 		cur = ctxt->input->cur[0];
11044 		next = ctxt->input->cur[1];
11045 
11046 		test = CUR_PTR;
11047 	        cons = ctxt->input->consumed;
11048 		if ((cur == '<') && (next == '/')) {
11049 		    ctxt->instate = XML_PARSER_END_TAG;
11050 		    break;
11051 	        } else if ((cur == '<') && (next == '?')) {
11052 		    if ((!terminate) &&
11053 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11054 			goto done;
11055 		    xmlParsePI(ctxt);
11056 		} else if ((cur == '<') && (next != '!')) {
11057 		    ctxt->instate = XML_PARSER_START_TAG;
11058 		    break;
11059 		} else if ((cur == '<') && (next == '!') &&
11060 		           (ctxt->input->cur[2] == '-') &&
11061 			   (ctxt->input->cur[3] == '-')) {
11062 		    int term;
11063 
11064 	            if (avail < 4)
11065 		        goto done;
11066 		    ctxt->input->cur += 4;
11067 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11068 		    ctxt->input->cur -= 4;
11069 		    if ((!terminate) && (term < 0))
11070 			goto done;
11071 		    xmlParseComment(ctxt);
11072 		    ctxt->instate = XML_PARSER_CONTENT;
11073 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11074 		    (ctxt->input->cur[2] == '[') &&
11075 		    (ctxt->input->cur[3] == 'C') &&
11076 		    (ctxt->input->cur[4] == 'D') &&
11077 		    (ctxt->input->cur[5] == 'A') &&
11078 		    (ctxt->input->cur[6] == 'T') &&
11079 		    (ctxt->input->cur[7] == 'A') &&
11080 		    (ctxt->input->cur[8] == '[')) {
11081 		    SKIP(9);
11082 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11083 		    break;
11084 		} else if ((cur == '<') && (next == '!') &&
11085 		           (avail < 9)) {
11086 		    goto done;
11087 		} else if (cur == '&') {
11088 		    if ((!terminate) &&
11089 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11090 			goto done;
11091 		    xmlParseReference(ctxt);
11092 		} else {
11093 		    /* TODO Avoid the extra copy, handle directly !!! */
11094 		    /*
11095 		     * Goal of the following test is:
11096 		     *  - minimize calls to the SAX 'character' callback
11097 		     *    when they are mergeable
11098 		     *  - handle an problem for isBlank when we only parse
11099 		     *    a sequence of blank chars and the next one is
11100 		     *    not available to check against '<' presence.
11101 		     *  - tries to homogenize the differences in SAX
11102 		     *    callbacks between the push and pull versions
11103 		     *    of the parser.
11104 		     */
11105 		    if ((ctxt->inputNr == 1) &&
11106 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11107 			if (!terminate) {
11108 			    if (ctxt->progressive) {
11109 				if ((lastlt == NULL) ||
11110 				    (ctxt->input->cur > lastlt))
11111 				    goto done;
11112 			    } else if (xmlParseLookupSequence(ctxt,
11113 			                                      '<', 0, 0) < 0) {
11114 				goto done;
11115 			    }
11116 			}
11117                     }
11118 		    ctxt->checkIndex = 0;
11119 		    xmlParseCharData(ctxt, 0);
11120 		}
11121 		/*
11122 		 * Pop-up of finished entities.
11123 		 */
11124 		while ((RAW == 0) && (ctxt->inputNr > 1))
11125 		    xmlPopInput(ctxt);
11126 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11127 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11128 		                "detected an error in element content\n");
11129 		    ctxt->instate = XML_PARSER_EOF;
11130 		    break;
11131 		}
11132 		break;
11133 	    }
11134             case XML_PARSER_END_TAG:
11135 		if (avail < 2)
11136 		    goto done;
11137 		if (!terminate) {
11138 		    if (ctxt->progressive) {
11139 		        /* > can be found unescaped in attribute values */
11140 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11141 			    goto done;
11142 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11143 			goto done;
11144 		    }
11145 		}
11146 		if (ctxt->sax2) {
11147 		    xmlParseEndTag2(ctxt,
11148 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11149 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11150 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11151 		    nameNsPop(ctxt);
11152 		}
11153 #ifdef LIBXML_SAX1_ENABLED
11154 		  else
11155 		    xmlParseEndTag1(ctxt, 0);
11156 #endif /* LIBXML_SAX1_ENABLED */
11157 		if (ctxt->nameNr == 0) {
11158 		    ctxt->instate = XML_PARSER_EPILOG;
11159 		} else {
11160 		    ctxt->instate = XML_PARSER_CONTENT;
11161 		}
11162 		break;
11163             case XML_PARSER_CDATA_SECTION: {
11164 	        /*
11165 		 * The Push mode need to have the SAX callback for
11166 		 * cdataBlock merge back contiguous callbacks.
11167 		 */
11168 		int base;
11169 
11170 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11171 		if (base < 0) {
11172 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11173 		        int tmp;
11174 
11175 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11176 			                        XML_PARSER_BIG_BUFFER_SIZE);
11177 			if (tmp < 0) {
11178 			    tmp = -tmp;
11179 			    ctxt->input->cur += tmp;
11180 			    goto encoding_error;
11181 			}
11182 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11183 			    if (ctxt->sax->cdataBlock != NULL)
11184 				ctxt->sax->cdataBlock(ctxt->userData,
11185 				                      ctxt->input->cur, tmp);
11186 			    else if (ctxt->sax->characters != NULL)
11187 				ctxt->sax->characters(ctxt->userData,
11188 				                      ctxt->input->cur, tmp);
11189 			}
11190 			SKIPL(tmp);
11191 			ctxt->checkIndex = 0;
11192 		    }
11193 		    goto done;
11194 		} else {
11195 		    int tmp;
11196 
11197 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11198 		    if ((tmp < 0) || (tmp != base)) {
11199 			tmp = -tmp;
11200 			ctxt->input->cur += tmp;
11201 			goto encoding_error;
11202 		    }
11203 		    if ((ctxt->sax != NULL) && (base == 0) &&
11204 		        (ctxt->sax->cdataBlock != NULL) &&
11205 		        (!ctxt->disableSAX)) {
11206 			/*
11207 			 * Special case to provide identical behaviour
11208 			 * between pull and push parsers on enpty CDATA
11209 			 * sections
11210 			 */
11211 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11212 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11213 			               "<![CDATA[", 9)))
11214 			     ctxt->sax->cdataBlock(ctxt->userData,
11215 			                           BAD_CAST "", 0);
11216 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11217 			(!ctxt->disableSAX)) {
11218 			if (ctxt->sax->cdataBlock != NULL)
11219 			    ctxt->sax->cdataBlock(ctxt->userData,
11220 						  ctxt->input->cur, base);
11221 			else if (ctxt->sax->characters != NULL)
11222 			    ctxt->sax->characters(ctxt->userData,
11223 						  ctxt->input->cur, base);
11224 		    }
11225 		    SKIPL(base + 3);
11226 		    ctxt->checkIndex = 0;
11227 		    ctxt->instate = XML_PARSER_CONTENT;
11228 #ifdef DEBUG_PUSH
11229 		    xmlGenericError(xmlGenericErrorContext,
11230 			    "PP: entering CONTENT\n");
11231 #endif
11232 		}
11233 		break;
11234 	    }
11235             case XML_PARSER_MISC:
11236 		SKIP_BLANKS;
11237 		if (ctxt->input->buf == NULL)
11238 		    avail = ctxt->input->length -
11239 		            (ctxt->input->cur - ctxt->input->base);
11240 		else
11241 		    avail = ctxt->input->buf->buffer->use -
11242 		            (ctxt->input->cur - ctxt->input->base);
11243 		if (avail < 2)
11244 		    goto done;
11245 		cur = ctxt->input->cur[0];
11246 		next = ctxt->input->cur[1];
11247 	        if ((cur == '<') && (next == '?')) {
11248 		    if ((!terminate) &&
11249 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11250 			goto done;
11251 #ifdef DEBUG_PUSH
11252 		    xmlGenericError(xmlGenericErrorContext,
11253 			    "PP: Parsing PI\n");
11254 #endif
11255 		    xmlParsePI(ctxt);
11256 		    ctxt->checkIndex = 0;
11257 		} else if ((cur == '<') && (next == '!') &&
11258 		    (ctxt->input->cur[2] == '-') &&
11259 		    (ctxt->input->cur[3] == '-')) {
11260 		    if ((!terminate) &&
11261 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11262 			goto done;
11263 #ifdef DEBUG_PUSH
11264 		    xmlGenericError(xmlGenericErrorContext,
11265 			    "PP: Parsing Comment\n");
11266 #endif
11267 		    xmlParseComment(ctxt);
11268 		    ctxt->instate = XML_PARSER_MISC;
11269 		    ctxt->checkIndex = 0;
11270 		} else if ((cur == '<') && (next == '!') &&
11271 		    (ctxt->input->cur[2] == 'D') &&
11272 		    (ctxt->input->cur[3] == 'O') &&
11273 		    (ctxt->input->cur[4] == 'C') &&
11274 		    (ctxt->input->cur[5] == 'T') &&
11275 		    (ctxt->input->cur[6] == 'Y') &&
11276 		    (ctxt->input->cur[7] == 'P') &&
11277 		    (ctxt->input->cur[8] == 'E')) {
11278 		    if ((!terminate) &&
11279 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11280 			goto done;
11281 #ifdef DEBUG_PUSH
11282 		    xmlGenericError(xmlGenericErrorContext,
11283 			    "PP: Parsing internal subset\n");
11284 #endif
11285 		    ctxt->inSubset = 1;
11286 		    xmlParseDocTypeDecl(ctxt);
11287 		    if (RAW == '[') {
11288 			ctxt->instate = XML_PARSER_DTD;
11289 #ifdef DEBUG_PUSH
11290 			xmlGenericError(xmlGenericErrorContext,
11291 				"PP: entering DTD\n");
11292 #endif
11293 		    } else {
11294 			/*
11295 			 * Create and update the external subset.
11296 			 */
11297 			ctxt->inSubset = 2;
11298 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11299 			    (ctxt->sax->externalSubset != NULL))
11300 			    ctxt->sax->externalSubset(ctxt->userData,
11301 				    ctxt->intSubName, ctxt->extSubSystem,
11302 				    ctxt->extSubURI);
11303 			ctxt->inSubset = 0;
11304 			xmlCleanSpecialAttr(ctxt);
11305 			ctxt->instate = XML_PARSER_PROLOG;
11306 #ifdef DEBUG_PUSH
11307 			xmlGenericError(xmlGenericErrorContext,
11308 				"PP: entering PROLOG\n");
11309 #endif
11310 		    }
11311 		} else if ((cur == '<') && (next == '!') &&
11312 		           (avail < 9)) {
11313 		    goto done;
11314 		} else {
11315 		    ctxt->instate = XML_PARSER_START_TAG;
11316 		    ctxt->progressive = 1;
11317 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11318 #ifdef DEBUG_PUSH
11319 		    xmlGenericError(xmlGenericErrorContext,
11320 			    "PP: entering START_TAG\n");
11321 #endif
11322 		}
11323 		break;
11324             case XML_PARSER_PROLOG:
11325 		SKIP_BLANKS;
11326 		if (ctxt->input->buf == NULL)
11327 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11328 		else
11329 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11330 		if (avail < 2)
11331 		    goto done;
11332 		cur = ctxt->input->cur[0];
11333 		next = ctxt->input->cur[1];
11334 	        if ((cur == '<') && (next == '?')) {
11335 		    if ((!terminate) &&
11336 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11337 			goto done;
11338 #ifdef DEBUG_PUSH
11339 		    xmlGenericError(xmlGenericErrorContext,
11340 			    "PP: Parsing PI\n");
11341 #endif
11342 		    xmlParsePI(ctxt);
11343 		} else if ((cur == '<') && (next == '!') &&
11344 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11345 		    if ((!terminate) &&
11346 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11347 			goto done;
11348 #ifdef DEBUG_PUSH
11349 		    xmlGenericError(xmlGenericErrorContext,
11350 			    "PP: Parsing Comment\n");
11351 #endif
11352 		    xmlParseComment(ctxt);
11353 		    ctxt->instate = XML_PARSER_PROLOG;
11354 		} else if ((cur == '<') && (next == '!') &&
11355 		           (avail < 4)) {
11356 		    goto done;
11357 		} else {
11358 		    ctxt->instate = XML_PARSER_START_TAG;
11359 		    if (ctxt->progressive == 0)
11360 			ctxt->progressive = 1;
11361 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11362 #ifdef DEBUG_PUSH
11363 		    xmlGenericError(xmlGenericErrorContext,
11364 			    "PP: entering START_TAG\n");
11365 #endif
11366 		}
11367 		break;
11368             case XML_PARSER_EPILOG:
11369 		SKIP_BLANKS;
11370 		if (ctxt->input->buf == NULL)
11371 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11372 		else
11373 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11374 		if (avail < 2)
11375 		    goto done;
11376 		cur = ctxt->input->cur[0];
11377 		next = ctxt->input->cur[1];
11378 	        if ((cur == '<') && (next == '?')) {
11379 		    if ((!terminate) &&
11380 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 			goto done;
11382 #ifdef DEBUG_PUSH
11383 		    xmlGenericError(xmlGenericErrorContext,
11384 			    "PP: Parsing PI\n");
11385 #endif
11386 		    xmlParsePI(ctxt);
11387 		    ctxt->instate = XML_PARSER_EPILOG;
11388 		} else if ((cur == '<') && (next == '!') &&
11389 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11390 		    if ((!terminate) &&
11391 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11392 			goto done;
11393 #ifdef DEBUG_PUSH
11394 		    xmlGenericError(xmlGenericErrorContext,
11395 			    "PP: Parsing Comment\n");
11396 #endif
11397 		    xmlParseComment(ctxt);
11398 		    ctxt->instate = XML_PARSER_EPILOG;
11399 		} else if ((cur == '<') && (next == '!') &&
11400 		           (avail < 4)) {
11401 		    goto done;
11402 		} else {
11403 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11404 		    ctxt->instate = XML_PARSER_EOF;
11405 #ifdef DEBUG_PUSH
11406 		    xmlGenericError(xmlGenericErrorContext,
11407 			    "PP: entering EOF\n");
11408 #endif
11409 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11410 			ctxt->sax->endDocument(ctxt->userData);
11411 		    goto done;
11412 		}
11413 		break;
11414             case XML_PARSER_DTD: {
11415 	        /*
11416 		 * Sorry but progressive parsing of the internal subset
11417 		 * is not expected to be supported. We first check that
11418 		 * the full content of the internal subset is available and
11419 		 * the parsing is launched only at that point.
11420 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11421 		 * section and not in a ']]>' sequence which are conditional
11422 		 * sections (whoever argued to keep that crap in XML deserve
11423 		 * a place in hell !).
11424 		 */
11425 		int base, i;
11426 		xmlChar *buf;
11427 	        xmlChar quote = 0;
11428 
11429 		base = ctxt->input->cur - ctxt->input->base;
11430 		if (base < 0) return(0);
11431 		if (ctxt->checkIndex > base)
11432 		    base = ctxt->checkIndex;
11433 		buf = ctxt->input->buf->buffer->content;
11434 		for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11435 		     base++) {
11436 		    if (quote != 0) {
11437 		        if (buf[base] == quote)
11438 			    quote = 0;
11439 			continue;
11440 		    }
11441 		    if ((quote == 0) && (buf[base] == '<')) {
11442 		        int found  = 0;
11443 			/* special handling of comments */
11444 		        if (((unsigned int) base + 4 <
11445 			     ctxt->input->buf->buffer->use) &&
11446 			    (buf[base + 1] == '!') &&
11447 			    (buf[base + 2] == '-') &&
11448 			    (buf[base + 3] == '-')) {
11449 			    for (;(unsigned int) base + 3 <
11450 			          ctxt->input->buf->buffer->use; base++) {
11451 				if ((buf[base] == '-') &&
11452 				    (buf[base + 1] == '-') &&
11453 				    (buf[base + 2] == '>')) {
11454 				    found = 1;
11455 				    base += 2;
11456 				    break;
11457 				}
11458 		            }
11459 			    if (!found) {
11460 #if 0
11461 			        fprintf(stderr, "unfinished comment\n");
11462 #endif
11463 			        break; /* for */
11464 		            }
11465 		            continue;
11466 			}
11467 		    }
11468 		    if (buf[base] == '"') {
11469 		        quote = '"';
11470 			continue;
11471 		    }
11472 		    if (buf[base] == '\'') {
11473 		        quote = '\'';
11474 			continue;
11475 		    }
11476 		    if (buf[base] == ']') {
11477 #if 0
11478 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11479 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11480 #endif
11481 		        if ((unsigned int) base +1 >=
11482 		            ctxt->input->buf->buffer->use)
11483 			    break;
11484 			if (buf[base + 1] == ']') {
11485 			    /* conditional crap, skip both ']' ! */
11486 			    base++;
11487 			    continue;
11488 			}
11489 		        for (i = 1;
11490 		     (unsigned int) base + i < ctxt->input->buf->buffer->use;
11491 		             i++) {
11492 			    if (buf[base + i] == '>') {
11493 #if 0
11494 			        fprintf(stderr, "found\n");
11495 #endif
11496 			        goto found_end_int_subset;
11497 			    }
11498 			    if (!IS_BLANK_CH(buf[base + i])) {
11499 #if 0
11500 			        fprintf(stderr, "not found\n");
11501 #endif
11502 			        goto not_end_of_int_subset;
11503 			    }
11504 			}
11505 #if 0
11506 			fprintf(stderr, "end of stream\n");
11507 #endif
11508 		        break;
11509 
11510 		    }
11511 not_end_of_int_subset:
11512                     continue; /* for */
11513 		}
11514 		/*
11515 		 * We didn't found the end of the Internal subset
11516 		 */
11517 #ifdef DEBUG_PUSH
11518 		if (next == 0)
11519 		    xmlGenericError(xmlGenericErrorContext,
11520 			    "PP: lookup of int subset end filed\n");
11521 #endif
11522 	        goto done;
11523 
11524 found_end_int_subset:
11525 		xmlParseInternalSubset(ctxt);
11526 		ctxt->inSubset = 2;
11527 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11528 		    (ctxt->sax->externalSubset != NULL))
11529 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11530 			    ctxt->extSubSystem, ctxt->extSubURI);
11531 		ctxt->inSubset = 0;
11532 		xmlCleanSpecialAttr(ctxt);
11533 		ctxt->instate = XML_PARSER_PROLOG;
11534 		ctxt->checkIndex = 0;
11535 #ifdef DEBUG_PUSH
11536 		xmlGenericError(xmlGenericErrorContext,
11537 			"PP: entering PROLOG\n");
11538 #endif
11539                 break;
11540 	    }
11541             case XML_PARSER_COMMENT:
11542 		xmlGenericError(xmlGenericErrorContext,
11543 			"PP: internal error, state == COMMENT\n");
11544 		ctxt->instate = XML_PARSER_CONTENT;
11545 #ifdef DEBUG_PUSH
11546 		xmlGenericError(xmlGenericErrorContext,
11547 			"PP: entering CONTENT\n");
11548 #endif
11549 		break;
11550             case XML_PARSER_IGNORE:
11551 		xmlGenericError(xmlGenericErrorContext,
11552 			"PP: internal error, state == IGNORE");
11553 	        ctxt->instate = XML_PARSER_DTD;
11554 #ifdef DEBUG_PUSH
11555 		xmlGenericError(xmlGenericErrorContext,
11556 			"PP: entering DTD\n");
11557 #endif
11558 	        break;
11559             case XML_PARSER_PI:
11560 		xmlGenericError(xmlGenericErrorContext,
11561 			"PP: internal error, state == PI\n");
11562 		ctxt->instate = XML_PARSER_CONTENT;
11563 #ifdef DEBUG_PUSH
11564 		xmlGenericError(xmlGenericErrorContext,
11565 			"PP: entering CONTENT\n");
11566 #endif
11567 		break;
11568             case XML_PARSER_ENTITY_DECL:
11569 		xmlGenericError(xmlGenericErrorContext,
11570 			"PP: internal error, state == ENTITY_DECL\n");
11571 		ctxt->instate = XML_PARSER_DTD;
11572 #ifdef DEBUG_PUSH
11573 		xmlGenericError(xmlGenericErrorContext,
11574 			"PP: entering DTD\n");
11575 #endif
11576 		break;
11577             case XML_PARSER_ENTITY_VALUE:
11578 		xmlGenericError(xmlGenericErrorContext,
11579 			"PP: internal error, state == ENTITY_VALUE\n");
11580 		ctxt->instate = XML_PARSER_CONTENT;
11581 #ifdef DEBUG_PUSH
11582 		xmlGenericError(xmlGenericErrorContext,
11583 			"PP: entering DTD\n");
11584 #endif
11585 		break;
11586             case XML_PARSER_ATTRIBUTE_VALUE:
11587 		xmlGenericError(xmlGenericErrorContext,
11588 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
11589 		ctxt->instate = XML_PARSER_START_TAG;
11590 #ifdef DEBUG_PUSH
11591 		xmlGenericError(xmlGenericErrorContext,
11592 			"PP: entering START_TAG\n");
11593 #endif
11594 		break;
11595             case XML_PARSER_SYSTEM_LITERAL:
11596 		xmlGenericError(xmlGenericErrorContext,
11597 			"PP: internal error, state == SYSTEM_LITERAL\n");
11598 		ctxt->instate = XML_PARSER_START_TAG;
11599 #ifdef DEBUG_PUSH
11600 		xmlGenericError(xmlGenericErrorContext,
11601 			"PP: entering START_TAG\n");
11602 #endif
11603 		break;
11604             case XML_PARSER_PUBLIC_LITERAL:
11605 		xmlGenericError(xmlGenericErrorContext,
11606 			"PP: internal error, state == PUBLIC_LITERAL\n");
11607 		ctxt->instate = XML_PARSER_START_TAG;
11608 #ifdef DEBUG_PUSH
11609 		xmlGenericError(xmlGenericErrorContext,
11610 			"PP: entering START_TAG\n");
11611 #endif
11612 		break;
11613 	}
11614     }
11615 done:
11616 #ifdef DEBUG_PUSH
11617     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11618 #endif
11619     return(ret);
11620 encoding_error:
11621     {
11622         char buffer[150];
11623 
11624 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11625 			ctxt->input->cur[0], ctxt->input->cur[1],
11626 			ctxt->input->cur[2], ctxt->input->cur[3]);
11627 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11628 		     "Input is not proper UTF-8, indicate encoding !\n%s",
11629 		     BAD_CAST buffer, NULL);
11630     }
11631     return(0);
11632 }
11633 
11634 /**
11635  * xmlParseChunk:
11636  * @ctxt:  an XML parser context
11637  * @chunk:  an char array
11638  * @size:  the size in byte of the chunk
11639  * @terminate:  last chunk indicator
11640  *
11641  * Parse a Chunk of memory
11642  *
11643  * Returns zero if no error, the xmlParserErrors otherwise.
11644  */
11645 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11646 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11647               int terminate) {
11648     int end_in_lf = 0;
11649     int remain = 0;
11650 
11651     if (ctxt == NULL)
11652         return(XML_ERR_INTERNAL_ERROR);
11653     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11654         return(ctxt->errNo);
11655     if (ctxt->instate == XML_PARSER_START)
11656         xmlDetectSAX2(ctxt);
11657     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11658         (chunk[size - 1] == '\r')) {
11659 	end_in_lf = 1;
11660 	size--;
11661     }
11662 
11663 xmldecl_done:
11664 
11665     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11666         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11667 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11668 	int cur = ctxt->input->cur - ctxt->input->base;
11669 	int res;
11670 
11671         /*
11672          * Specific handling if we autodetected an encoding, we should not
11673          * push more than the first line ... which depend on the encoding
11674          * And only push the rest once the final encoding was detected
11675          */
11676         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11677             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11678             unsigned int len = 45;
11679 
11680             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11681                                BAD_CAST "UTF-16")) ||
11682                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11683                                BAD_CAST "UTF16")))
11684                 len = 90;
11685             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11686                                     BAD_CAST "UCS-4")) ||
11687                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11688                                     BAD_CAST "UCS4")))
11689                 len = 180;
11690 
11691             if (ctxt->input->buf->rawconsumed < len)
11692                 len -= ctxt->input->buf->rawconsumed;
11693 
11694             /*
11695              * Change size for reading the initial declaration only
11696              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11697              * will blindly copy extra bytes from memory.
11698              */
11699             if ((unsigned int) size > len) {
11700                 remain = size - len;
11701                 size = len;
11702             } else {
11703                 remain = 0;
11704             }
11705         }
11706 	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11707 	if (res < 0) {
11708 	    ctxt->errNo = XML_PARSER_EOF;
11709 	    ctxt->disableSAX = 1;
11710 	    return (XML_PARSER_EOF);
11711 	}
11712 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11713 	ctxt->input->cur = ctxt->input->base + cur;
11714 	ctxt->input->end =
11715 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11716 #ifdef DEBUG_PUSH
11717 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11718 #endif
11719 
11720     } else if (ctxt->instate != XML_PARSER_EOF) {
11721 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11722 	    xmlParserInputBufferPtr in = ctxt->input->buf;
11723 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
11724 		    (in->raw != NULL)) {
11725 		int nbchars;
11726 
11727 		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11728 		if (nbchars < 0) {
11729 		    /* TODO 2.6.0 */
11730 		    xmlGenericError(xmlGenericErrorContext,
11731 				    "xmlParseChunk: encoder error\n");
11732 		    return(XML_ERR_INVALID_ENCODING);
11733 		}
11734 	    }
11735 	}
11736     }
11737     if (remain != 0)
11738         xmlParseTryOrFinish(ctxt, 0);
11739     else
11740         xmlParseTryOrFinish(ctxt, terminate);
11741     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11742         return(ctxt->errNo);
11743 
11744     if (remain != 0) {
11745         chunk += size;
11746         size = remain;
11747         remain = 0;
11748         goto xmldecl_done;
11749     }
11750     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11751         (ctxt->input->buf != NULL)) {
11752 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11753     }
11754     if (terminate) {
11755 	/*
11756 	 * Check for termination
11757 	 */
11758 	int avail = 0;
11759 
11760 	if (ctxt->input != NULL) {
11761 	    if (ctxt->input->buf == NULL)
11762 		avail = ctxt->input->length -
11763 			(ctxt->input->cur - ctxt->input->base);
11764 	    else
11765 		avail = ctxt->input->buf->buffer->use -
11766 			(ctxt->input->cur - ctxt->input->base);
11767 	}
11768 
11769 	if ((ctxt->instate != XML_PARSER_EOF) &&
11770 	    (ctxt->instate != XML_PARSER_EPILOG)) {
11771 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11772 	}
11773 	if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11774 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11775 	}
11776 	if (ctxt->instate != XML_PARSER_EOF) {
11777 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11778 		ctxt->sax->endDocument(ctxt->userData);
11779 	}
11780 	ctxt->instate = XML_PARSER_EOF;
11781     }
11782     return((xmlParserErrors) ctxt->errNo);
11783 }
11784 
11785 /************************************************************************
11786  *									*
11787  * 		I/O front end functions to the parser			*
11788  *									*
11789  ************************************************************************/
11790 
11791 /**
11792  * xmlCreatePushParserCtxt:
11793  * @sax:  a SAX handler
11794  * @user_data:  The user data returned on SAX callbacks
11795  * @chunk:  a pointer to an array of chars
11796  * @size:  number of chars in the array
11797  * @filename:  an optional file name or URI
11798  *
11799  * Create a parser context for using the XML parser in push mode.
11800  * If @buffer and @size are non-NULL, the data is used to detect
11801  * the encoding.  The remaining characters will be parsed so they
11802  * don't need to be fed in again through xmlParseChunk.
11803  * To allow content encoding detection, @size should be >= 4
11804  * The value of @filename is used for fetching external entities
11805  * and error/warning reports.
11806  *
11807  * Returns the new parser context or NULL
11808  */
11809 
11810 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11811 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11812                         const char *chunk, int size, const char *filename) {
11813     xmlParserCtxtPtr ctxt;
11814     xmlParserInputPtr inputStream;
11815     xmlParserInputBufferPtr buf;
11816     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11817 
11818     /*
11819      * plug some encoding conversion routines
11820      */
11821     if ((chunk != NULL) && (size >= 4))
11822 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11823 
11824     buf = xmlAllocParserInputBuffer(enc);
11825     if (buf == NULL) return(NULL);
11826 
11827     ctxt = xmlNewParserCtxt();
11828     if (ctxt == NULL) {
11829         xmlErrMemory(NULL, "creating parser: out of memory\n");
11830 	xmlFreeParserInputBuffer(buf);
11831 	return(NULL);
11832     }
11833     ctxt->dictNames = 1;
11834     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11835     if (ctxt->pushTab == NULL) {
11836         xmlErrMemory(ctxt, NULL);
11837 	xmlFreeParserInputBuffer(buf);
11838 	xmlFreeParserCtxt(ctxt);
11839 	return(NULL);
11840     }
11841     if (sax != NULL) {
11842 #ifdef LIBXML_SAX1_ENABLED
11843 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11844 #endif /* LIBXML_SAX1_ENABLED */
11845 	    xmlFree(ctxt->sax);
11846 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11847 	if (ctxt->sax == NULL) {
11848 	    xmlErrMemory(ctxt, NULL);
11849 	    xmlFreeParserInputBuffer(buf);
11850 	    xmlFreeParserCtxt(ctxt);
11851 	    return(NULL);
11852 	}
11853 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11854 	if (sax->initialized == XML_SAX2_MAGIC)
11855 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11856 	else
11857 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11858 	if (user_data != NULL)
11859 	    ctxt->userData = user_data;
11860     }
11861     if (filename == NULL) {
11862 	ctxt->directory = NULL;
11863     } else {
11864         ctxt->directory = xmlParserGetDirectory(filename);
11865     }
11866 
11867     inputStream = xmlNewInputStream(ctxt);
11868     if (inputStream == NULL) {
11869 	xmlFreeParserCtxt(ctxt);
11870 	xmlFreeParserInputBuffer(buf);
11871 	return(NULL);
11872     }
11873 
11874     if (filename == NULL)
11875 	inputStream->filename = NULL;
11876     else {
11877 	inputStream->filename = (char *)
11878 	    xmlCanonicPath((const xmlChar *) filename);
11879 	if (inputStream->filename == NULL) {
11880 	    xmlFreeParserCtxt(ctxt);
11881 	    xmlFreeParserInputBuffer(buf);
11882 	    return(NULL);
11883 	}
11884     }
11885     inputStream->buf = buf;
11886     inputStream->base = inputStream->buf->buffer->content;
11887     inputStream->cur = inputStream->buf->buffer->content;
11888     inputStream->end =
11889 	&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11890 
11891     inputPush(ctxt, inputStream);
11892 
11893     /*
11894      * If the caller didn't provide an initial 'chunk' for determining
11895      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11896      * that it can be automatically determined later
11897      */
11898     if ((size == 0) || (chunk == NULL)) {
11899 	ctxt->charset = XML_CHAR_ENCODING_NONE;
11900     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11901 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11902 	int cur = ctxt->input->cur - ctxt->input->base;
11903 
11904 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11905 
11906 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11907 	ctxt->input->cur = ctxt->input->base + cur;
11908 	ctxt->input->end =
11909 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11910 #ifdef DEBUG_PUSH
11911 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11912 #endif
11913     }
11914 
11915     if (enc != XML_CHAR_ENCODING_NONE) {
11916         xmlSwitchEncoding(ctxt, enc);
11917     }
11918 
11919     return(ctxt);
11920 }
11921 #endif /* LIBXML_PUSH_ENABLED */
11922 
11923 /**
11924  * xmlStopParser:
11925  * @ctxt:  an XML parser context
11926  *
11927  * Blocks further parser processing
11928  */
11929 void
xmlStopParser(xmlParserCtxtPtr ctxt)11930 xmlStopParser(xmlParserCtxtPtr ctxt) {
11931     if (ctxt == NULL)
11932         return;
11933     ctxt->instate = XML_PARSER_EOF;
11934     ctxt->disableSAX = 1;
11935     if (ctxt->input != NULL) {
11936 	ctxt->input->cur = BAD_CAST"";
11937 	ctxt->input->base = ctxt->input->cur;
11938     }
11939 }
11940 
11941 /**
11942  * xmlCreateIOParserCtxt:
11943  * @sax:  a SAX handler
11944  * @user_data:  The user data returned on SAX callbacks
11945  * @ioread:  an I/O read function
11946  * @ioclose:  an I/O close function
11947  * @ioctx:  an I/O handler
11948  * @enc:  the charset encoding if known
11949  *
11950  * Create a parser context for using the XML parser with an existing
11951  * I/O stream
11952  *
11953  * Returns the new parser context or NULL
11954  */
11955 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11956 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11957 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
11958 	void *ioctx, xmlCharEncoding enc) {
11959     xmlParserCtxtPtr ctxt;
11960     xmlParserInputPtr inputStream;
11961     xmlParserInputBufferPtr buf;
11962 
11963     if (ioread == NULL) return(NULL);
11964 
11965     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11966     if (buf == NULL) return(NULL);
11967 
11968     ctxt = xmlNewParserCtxt();
11969     if (ctxt == NULL) {
11970 	xmlFreeParserInputBuffer(buf);
11971 	return(NULL);
11972     }
11973     if (sax != NULL) {
11974 #ifdef LIBXML_SAX1_ENABLED
11975 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11976 #endif /* LIBXML_SAX1_ENABLED */
11977 	    xmlFree(ctxt->sax);
11978 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11979 	if (ctxt->sax == NULL) {
11980 	    xmlErrMemory(ctxt, NULL);
11981 	    xmlFreeParserCtxt(ctxt);
11982 	    return(NULL);
11983 	}
11984 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11985 	if (sax->initialized == XML_SAX2_MAGIC)
11986 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11987 	else
11988 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11989 	if (user_data != NULL)
11990 	    ctxt->userData = user_data;
11991     }
11992 
11993     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11994     if (inputStream == NULL) {
11995 	xmlFreeParserCtxt(ctxt);
11996 	return(NULL);
11997     }
11998     inputPush(ctxt, inputStream);
11999 
12000     return(ctxt);
12001 }
12002 
12003 #ifdef LIBXML_VALID_ENABLED
12004 /************************************************************************
12005  *									*
12006  * 		Front ends when parsing a DTD				*
12007  *									*
12008  ************************************************************************/
12009 
12010 /**
12011  * xmlIOParseDTD:
12012  * @sax:  the SAX handler block or NULL
12013  * @input:  an Input Buffer
12014  * @enc:  the charset encoding if known
12015  *
12016  * Load and parse a DTD
12017  *
12018  * Returns the resulting xmlDtdPtr or NULL in case of error.
12019  * @input will be freed by the function in any case.
12020  */
12021 
12022 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12023 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12024 	      xmlCharEncoding enc) {
12025     xmlDtdPtr ret = NULL;
12026     xmlParserCtxtPtr ctxt;
12027     xmlParserInputPtr pinput = NULL;
12028     xmlChar start[4];
12029 
12030     if (input == NULL)
12031 	return(NULL);
12032 
12033     ctxt = xmlNewParserCtxt();
12034     if (ctxt == NULL) {
12035         xmlFreeParserInputBuffer(input);
12036 	return(NULL);
12037     }
12038 
12039     /*
12040      * Set-up the SAX context
12041      */
12042     if (sax != NULL) {
12043 	if (ctxt->sax != NULL)
12044 	    xmlFree(ctxt->sax);
12045         ctxt->sax = sax;
12046         ctxt->userData = ctxt;
12047     }
12048     xmlDetectSAX2(ctxt);
12049 
12050     /*
12051      * generate a parser input from the I/O handler
12052      */
12053 
12054     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12055     if (pinput == NULL) {
12056         if (sax != NULL) ctxt->sax = NULL;
12057         xmlFreeParserInputBuffer(input);
12058 	xmlFreeParserCtxt(ctxt);
12059 	return(NULL);
12060     }
12061 
12062     /*
12063      * plug some encoding conversion routines here.
12064      */
12065     if (xmlPushInput(ctxt, pinput) < 0) {
12066         if (sax != NULL) ctxt->sax = NULL;
12067 	xmlFreeParserCtxt(ctxt);
12068 	return(NULL);
12069     }
12070     if (enc != XML_CHAR_ENCODING_NONE) {
12071         xmlSwitchEncoding(ctxt, enc);
12072     }
12073 
12074     pinput->filename = NULL;
12075     pinput->line = 1;
12076     pinput->col = 1;
12077     pinput->base = ctxt->input->cur;
12078     pinput->cur = ctxt->input->cur;
12079     pinput->free = NULL;
12080 
12081     /*
12082      * let's parse that entity knowing it's an external subset.
12083      */
12084     ctxt->inSubset = 2;
12085     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12086     if (ctxt->myDoc == NULL) {
12087 	xmlErrMemory(ctxt, "New Doc failed");
12088 	return(NULL);
12089     }
12090     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12091     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12092 	                               BAD_CAST "none", BAD_CAST "none");
12093 
12094     if ((enc == XML_CHAR_ENCODING_NONE) &&
12095         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12096 	/*
12097 	 * Get the 4 first bytes and decode the charset
12098 	 * if enc != XML_CHAR_ENCODING_NONE
12099 	 * plug some encoding conversion routines.
12100 	 */
12101 	start[0] = RAW;
12102 	start[1] = NXT(1);
12103 	start[2] = NXT(2);
12104 	start[3] = NXT(3);
12105 	enc = xmlDetectCharEncoding(start, 4);
12106 	if (enc != XML_CHAR_ENCODING_NONE) {
12107 	    xmlSwitchEncoding(ctxt, enc);
12108 	}
12109     }
12110 
12111     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12112 
12113     if (ctxt->myDoc != NULL) {
12114 	if (ctxt->wellFormed) {
12115 	    ret = ctxt->myDoc->extSubset;
12116 	    ctxt->myDoc->extSubset = NULL;
12117 	    if (ret != NULL) {
12118 		xmlNodePtr tmp;
12119 
12120 		ret->doc = NULL;
12121 		tmp = ret->children;
12122 		while (tmp != NULL) {
12123 		    tmp->doc = NULL;
12124 		    tmp = tmp->next;
12125 		}
12126 	    }
12127 	} else {
12128 	    ret = NULL;
12129 	}
12130         xmlFreeDoc(ctxt->myDoc);
12131         ctxt->myDoc = NULL;
12132     }
12133     if (sax != NULL) ctxt->sax = NULL;
12134     xmlFreeParserCtxt(ctxt);
12135 
12136     return(ret);
12137 }
12138 
12139 /**
12140  * xmlSAXParseDTD:
12141  * @sax:  the SAX handler block
12142  * @ExternalID:  a NAME* containing the External ID of the DTD
12143  * @SystemID:  a NAME* containing the URL to the DTD
12144  *
12145  * Load and parse an external subset.
12146  *
12147  * Returns the resulting xmlDtdPtr or NULL in case of error.
12148  */
12149 
12150 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12151 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12152                           const xmlChar *SystemID) {
12153     xmlDtdPtr ret = NULL;
12154     xmlParserCtxtPtr ctxt;
12155     xmlParserInputPtr input = NULL;
12156     xmlCharEncoding enc;
12157     xmlChar* systemIdCanonic;
12158 
12159     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12160 
12161     ctxt = xmlNewParserCtxt();
12162     if (ctxt == NULL) {
12163 	return(NULL);
12164     }
12165 
12166     /*
12167      * Set-up the SAX context
12168      */
12169     if (sax != NULL) {
12170 	if (ctxt->sax != NULL)
12171 	    xmlFree(ctxt->sax);
12172         ctxt->sax = sax;
12173         ctxt->userData = ctxt;
12174     }
12175 
12176     /*
12177      * Canonicalise the system ID
12178      */
12179     systemIdCanonic = xmlCanonicPath(SystemID);
12180     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12181 	xmlFreeParserCtxt(ctxt);
12182 	return(NULL);
12183     }
12184 
12185     /*
12186      * Ask the Entity resolver to load the damn thing
12187      */
12188 
12189     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12190 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12191 	                                 systemIdCanonic);
12192     if (input == NULL) {
12193         if (sax != NULL) ctxt->sax = NULL;
12194 	xmlFreeParserCtxt(ctxt);
12195 	if (systemIdCanonic != NULL)
12196 	    xmlFree(systemIdCanonic);
12197 	return(NULL);
12198     }
12199 
12200     /*
12201      * plug some encoding conversion routines here.
12202      */
12203     if (xmlPushInput(ctxt, input) < 0) {
12204         if (sax != NULL) ctxt->sax = NULL;
12205 	xmlFreeParserCtxt(ctxt);
12206 	if (systemIdCanonic != NULL)
12207 	    xmlFree(systemIdCanonic);
12208 	return(NULL);
12209     }
12210     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12211 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12212 	xmlSwitchEncoding(ctxt, enc);
12213     }
12214 
12215     if (input->filename == NULL)
12216 	input->filename = (char *) systemIdCanonic;
12217     else
12218 	xmlFree(systemIdCanonic);
12219     input->line = 1;
12220     input->col = 1;
12221     input->base = ctxt->input->cur;
12222     input->cur = ctxt->input->cur;
12223     input->free = NULL;
12224 
12225     /*
12226      * let's parse that entity knowing it's an external subset.
12227      */
12228     ctxt->inSubset = 2;
12229     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12230     if (ctxt->myDoc == NULL) {
12231 	xmlErrMemory(ctxt, "New Doc failed");
12232         if (sax != NULL) ctxt->sax = NULL;
12233 	xmlFreeParserCtxt(ctxt);
12234 	return(NULL);
12235     }
12236     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12237     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12238 	                               ExternalID, SystemID);
12239     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12240 
12241     if (ctxt->myDoc != NULL) {
12242 	if (ctxt->wellFormed) {
12243 	    ret = ctxt->myDoc->extSubset;
12244 	    ctxt->myDoc->extSubset = NULL;
12245 	    if (ret != NULL) {
12246 		xmlNodePtr tmp;
12247 
12248 		ret->doc = NULL;
12249 		tmp = ret->children;
12250 		while (tmp != NULL) {
12251 		    tmp->doc = NULL;
12252 		    tmp = tmp->next;
12253 		}
12254 	    }
12255 	} else {
12256 	    ret = NULL;
12257 	}
12258         xmlFreeDoc(ctxt->myDoc);
12259         ctxt->myDoc = NULL;
12260     }
12261     if (sax != NULL) ctxt->sax = NULL;
12262     xmlFreeParserCtxt(ctxt);
12263 
12264     return(ret);
12265 }
12266 
12267 
12268 /**
12269  * xmlParseDTD:
12270  * @ExternalID:  a NAME* containing the External ID of the DTD
12271  * @SystemID:  a NAME* containing the URL to the DTD
12272  *
12273  * Load and parse an external subset.
12274  *
12275  * Returns the resulting xmlDtdPtr or NULL in case of error.
12276  */
12277 
12278 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12279 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12280     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12281 }
12282 #endif /* LIBXML_VALID_ENABLED */
12283 
12284 /************************************************************************
12285  *									*
12286  * 		Front ends when parsing an Entity			*
12287  *									*
12288  ************************************************************************/
12289 
12290 /**
12291  * xmlParseCtxtExternalEntity:
12292  * @ctx:  the existing parsing context
12293  * @URL:  the URL for the entity to load
12294  * @ID:  the System ID for the entity to load
12295  * @lst:  the return value for the set of parsed nodes
12296  *
12297  * Parse an external general entity within an existing parsing context
12298  * An external general parsed entity is well-formed if it matches the
12299  * production labeled extParsedEnt.
12300  *
12301  * [78] extParsedEnt ::= TextDecl? content
12302  *
12303  * Returns 0 if the entity is well formed, -1 in case of args problem and
12304  *    the parser error code otherwise
12305  */
12306 
12307 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12308 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12309 	               const xmlChar *ID, xmlNodePtr *lst) {
12310     xmlParserCtxtPtr ctxt;
12311     xmlDocPtr newDoc;
12312     xmlNodePtr newRoot;
12313     xmlSAXHandlerPtr oldsax = NULL;
12314     int ret = 0;
12315     xmlChar start[4];
12316     xmlCharEncoding enc;
12317 
12318     if (ctx == NULL) return(-1);
12319 
12320     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12321         (ctx->depth > 1024)) {
12322 	return(XML_ERR_ENTITY_LOOP);
12323     }
12324 
12325     if (lst != NULL)
12326         *lst = NULL;
12327     if ((URL == NULL) && (ID == NULL))
12328 	return(-1);
12329     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12330 	return(-1);
12331 
12332     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12333     if (ctxt == NULL) {
12334 	return(-1);
12335     }
12336 
12337     oldsax = ctxt->sax;
12338     ctxt->sax = ctx->sax;
12339     xmlDetectSAX2(ctxt);
12340     newDoc = xmlNewDoc(BAD_CAST "1.0");
12341     if (newDoc == NULL) {
12342 	xmlFreeParserCtxt(ctxt);
12343 	return(-1);
12344     }
12345     newDoc->properties = XML_DOC_INTERNAL;
12346     if (ctx->myDoc->dict) {
12347 	newDoc->dict = ctx->myDoc->dict;
12348 	xmlDictReference(newDoc->dict);
12349     }
12350     if (ctx->myDoc != NULL) {
12351 	newDoc->intSubset = ctx->myDoc->intSubset;
12352 	newDoc->extSubset = ctx->myDoc->extSubset;
12353     }
12354     if (ctx->myDoc->URL != NULL) {
12355 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12356     }
12357     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12358     if (newRoot == NULL) {
12359 	ctxt->sax = oldsax;
12360 	xmlFreeParserCtxt(ctxt);
12361 	newDoc->intSubset = NULL;
12362 	newDoc->extSubset = NULL;
12363         xmlFreeDoc(newDoc);
12364 	return(-1);
12365     }
12366     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12367     nodePush(ctxt, newDoc->children);
12368     if (ctx->myDoc == NULL) {
12369 	ctxt->myDoc = newDoc;
12370     } else {
12371 	ctxt->myDoc = ctx->myDoc;
12372 	newDoc->children->doc = ctx->myDoc;
12373     }
12374 
12375     /*
12376      * Get the 4 first bytes and decode the charset
12377      * if enc != XML_CHAR_ENCODING_NONE
12378      * plug some encoding conversion routines.
12379      */
12380     GROW
12381     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12382 	start[0] = RAW;
12383 	start[1] = NXT(1);
12384 	start[2] = NXT(2);
12385 	start[3] = NXT(3);
12386 	enc = xmlDetectCharEncoding(start, 4);
12387 	if (enc != XML_CHAR_ENCODING_NONE) {
12388 	    xmlSwitchEncoding(ctxt, enc);
12389 	}
12390     }
12391 
12392     /*
12393      * Parse a possible text declaration first
12394      */
12395     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12396 	xmlParseTextDecl(ctxt);
12397 	/*
12398 	 * An XML-1.0 document can't reference an entity not XML-1.0
12399 	 */
12400 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12401 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12402 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12403 	                   "Version mismatch between document and entity\n");
12404 	}
12405     }
12406 
12407     /*
12408      * Doing validity checking on chunk doesn't make sense
12409      */
12410     ctxt->instate = XML_PARSER_CONTENT;
12411     ctxt->validate = ctx->validate;
12412     ctxt->valid = ctx->valid;
12413     ctxt->loadsubset = ctx->loadsubset;
12414     ctxt->depth = ctx->depth + 1;
12415     ctxt->replaceEntities = ctx->replaceEntities;
12416     if (ctxt->validate) {
12417 	ctxt->vctxt.error = ctx->vctxt.error;
12418 	ctxt->vctxt.warning = ctx->vctxt.warning;
12419     } else {
12420 	ctxt->vctxt.error = NULL;
12421 	ctxt->vctxt.warning = NULL;
12422     }
12423     ctxt->vctxt.nodeTab = NULL;
12424     ctxt->vctxt.nodeNr = 0;
12425     ctxt->vctxt.nodeMax = 0;
12426     ctxt->vctxt.node = NULL;
12427     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12428     ctxt->dict = ctx->dict;
12429     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12430     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12431     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12432     ctxt->dictNames = ctx->dictNames;
12433     ctxt->attsDefault = ctx->attsDefault;
12434     ctxt->attsSpecial = ctx->attsSpecial;
12435     ctxt->linenumbers = ctx->linenumbers;
12436 
12437     xmlParseContent(ctxt);
12438 
12439     ctx->validate = ctxt->validate;
12440     ctx->valid = ctxt->valid;
12441     if ((RAW == '<') && (NXT(1) == '/')) {
12442 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12443     } else if (RAW != 0) {
12444 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12445     }
12446     if (ctxt->node != newDoc->children) {
12447 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12448     }
12449 
12450     if (!ctxt->wellFormed) {
12451         if (ctxt->errNo == 0)
12452 	    ret = 1;
12453 	else
12454 	    ret = ctxt->errNo;
12455     } else {
12456 	if (lst != NULL) {
12457 	    xmlNodePtr cur;
12458 
12459 	    /*
12460 	     * Return the newly created nodeset after unlinking it from
12461 	     * they pseudo parent.
12462 	     */
12463 	    cur = newDoc->children->children;
12464 	    *lst = cur;
12465 	    while (cur != NULL) {
12466 		cur->parent = NULL;
12467 		cur = cur->next;
12468 	    }
12469             newDoc->children->children = NULL;
12470 	}
12471 	ret = 0;
12472     }
12473     ctxt->sax = oldsax;
12474     ctxt->dict = NULL;
12475     ctxt->attsDefault = NULL;
12476     ctxt->attsSpecial = NULL;
12477     xmlFreeParserCtxt(ctxt);
12478     newDoc->intSubset = NULL;
12479     newDoc->extSubset = NULL;
12480     xmlFreeDoc(newDoc);
12481 
12482     return(ret);
12483 }
12484 
12485 /**
12486  * xmlParseExternalEntityPrivate:
12487  * @doc:  the document the chunk pertains to
12488  * @oldctxt:  the previous parser context if available
12489  * @sax:  the SAX handler bloc (possibly NULL)
12490  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12491  * @depth:  Used for loop detection, use 0
12492  * @URL:  the URL for the entity to load
12493  * @ID:  the System ID for the entity to load
12494  * @list:  the return value for the set of parsed nodes
12495  *
12496  * Private version of xmlParseExternalEntity()
12497  *
12498  * Returns 0 if the entity is well formed, -1 in case of args problem and
12499  *    the parser error code otherwise
12500  */
12501 
12502 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12503 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12504 	              xmlSAXHandlerPtr sax,
12505 		      void *user_data, int depth, const xmlChar *URL,
12506 		      const xmlChar *ID, xmlNodePtr *list) {
12507     xmlParserCtxtPtr ctxt;
12508     xmlDocPtr newDoc;
12509     xmlNodePtr newRoot;
12510     xmlSAXHandlerPtr oldsax = NULL;
12511     xmlParserErrors ret = XML_ERR_OK;
12512     xmlChar start[4];
12513     xmlCharEncoding enc;
12514 
12515     if (((depth > 40) &&
12516 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12517 	(depth > 1024)) {
12518 	return(XML_ERR_ENTITY_LOOP);
12519     }
12520 
12521     if (list != NULL)
12522         *list = NULL;
12523     if ((URL == NULL) && (ID == NULL))
12524 	return(XML_ERR_INTERNAL_ERROR);
12525     if (doc == NULL)
12526 	return(XML_ERR_INTERNAL_ERROR);
12527 
12528 
12529     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12530     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12531     ctxt->userData = ctxt;
12532     if (oldctxt != NULL) {
12533 	ctxt->_private = oldctxt->_private;
12534 	ctxt->loadsubset = oldctxt->loadsubset;
12535 	ctxt->validate = oldctxt->validate;
12536 	ctxt->external = oldctxt->external;
12537 	ctxt->record_info = oldctxt->record_info;
12538 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12539 	ctxt->node_seq.length = oldctxt->node_seq.length;
12540 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12541     } else {
12542 	/*
12543 	 * Doing validity checking on chunk without context
12544 	 * doesn't make sense
12545 	 */
12546 	ctxt->_private = NULL;
12547 	ctxt->validate = 0;
12548 	ctxt->external = 2;
12549 	ctxt->loadsubset = 0;
12550     }
12551     if (sax != NULL) {
12552 	oldsax = ctxt->sax;
12553         ctxt->sax = sax;
12554 	if (user_data != NULL)
12555 	    ctxt->userData = user_data;
12556     }
12557     xmlDetectSAX2(ctxt);
12558     newDoc = xmlNewDoc(BAD_CAST "1.0");
12559     if (newDoc == NULL) {
12560 	ctxt->node_seq.maximum = 0;
12561 	ctxt->node_seq.length = 0;
12562 	ctxt->node_seq.buffer = NULL;
12563 	xmlFreeParserCtxt(ctxt);
12564 	return(XML_ERR_INTERNAL_ERROR);
12565     }
12566     newDoc->properties = XML_DOC_INTERNAL;
12567     newDoc->intSubset = doc->intSubset;
12568     newDoc->extSubset = doc->extSubset;
12569     newDoc->dict = doc->dict;
12570     xmlDictReference(newDoc->dict);
12571 
12572     if (doc->URL != NULL) {
12573 	newDoc->URL = xmlStrdup(doc->URL);
12574     }
12575     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12576     if (newRoot == NULL) {
12577 	if (sax != NULL)
12578 	    ctxt->sax = oldsax;
12579 	ctxt->node_seq.maximum = 0;
12580 	ctxt->node_seq.length = 0;
12581 	ctxt->node_seq.buffer = NULL;
12582 	xmlFreeParserCtxt(ctxt);
12583 	newDoc->intSubset = NULL;
12584 	newDoc->extSubset = NULL;
12585         xmlFreeDoc(newDoc);
12586 	return(XML_ERR_INTERNAL_ERROR);
12587     }
12588     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12589     nodePush(ctxt, newDoc->children);
12590     ctxt->myDoc = doc;
12591     newRoot->doc = doc;
12592 
12593     /*
12594      * Get the 4 first bytes and decode the charset
12595      * if enc != XML_CHAR_ENCODING_NONE
12596      * plug some encoding conversion routines.
12597      */
12598     GROW;
12599     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12600 	start[0] = RAW;
12601 	start[1] = NXT(1);
12602 	start[2] = NXT(2);
12603 	start[3] = NXT(3);
12604 	enc = xmlDetectCharEncoding(start, 4);
12605 	if (enc != XML_CHAR_ENCODING_NONE) {
12606 	    xmlSwitchEncoding(ctxt, enc);
12607 	}
12608     }
12609 
12610     /*
12611      * Parse a possible text declaration first
12612      */
12613     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12614 	xmlParseTextDecl(ctxt);
12615     }
12616 
12617     ctxt->instate = XML_PARSER_CONTENT;
12618     ctxt->depth = depth;
12619 
12620     xmlParseContent(ctxt);
12621 
12622     if ((RAW == '<') && (NXT(1) == '/')) {
12623 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12624     } else if (RAW != 0) {
12625 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12626     }
12627     if (ctxt->node != newDoc->children) {
12628 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12629     }
12630 
12631     if (!ctxt->wellFormed) {
12632         if (ctxt->errNo == 0)
12633 	    ret = XML_ERR_INTERNAL_ERROR;
12634 	else
12635 	    ret = (xmlParserErrors)ctxt->errNo;
12636     } else {
12637 	if (list != NULL) {
12638 	    xmlNodePtr cur;
12639 
12640 	    /*
12641 	     * Return the newly created nodeset after unlinking it from
12642 	     * they pseudo parent.
12643 	     */
12644 	    cur = newDoc->children->children;
12645 	    *list = cur;
12646 	    while (cur != NULL) {
12647 		cur->parent = NULL;
12648 		cur = cur->next;
12649 	    }
12650             newDoc->children->children = NULL;
12651 	}
12652 	ret = XML_ERR_OK;
12653     }
12654 
12655     /*
12656      * Record in the parent context the number of entities replacement
12657      * done when parsing that reference.
12658      */
12659     if (oldctxt != NULL)
12660         oldctxt->nbentities += ctxt->nbentities;
12661 
12662     /*
12663      * Also record the size of the entity parsed
12664      */
12665     if (ctxt->input != NULL) {
12666 	oldctxt->sizeentities += ctxt->input->consumed;
12667 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12668     }
12669     /*
12670      * And record the last error if any
12671      */
12672     if (ctxt->lastError.code != XML_ERR_OK)
12673         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12674 
12675     if (sax != NULL)
12676 	ctxt->sax = oldsax;
12677     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12678     oldctxt->node_seq.length = ctxt->node_seq.length;
12679     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12680     ctxt->node_seq.maximum = 0;
12681     ctxt->node_seq.length = 0;
12682     ctxt->node_seq.buffer = NULL;
12683     xmlFreeParserCtxt(ctxt);
12684     newDoc->intSubset = NULL;
12685     newDoc->extSubset = NULL;
12686     xmlFreeDoc(newDoc);
12687 
12688     return(ret);
12689 }
12690 
12691 #ifdef LIBXML_SAX1_ENABLED
12692 /**
12693  * xmlParseExternalEntity:
12694  * @doc:  the document the chunk pertains to
12695  * @sax:  the SAX handler bloc (possibly NULL)
12696  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12697  * @depth:  Used for loop detection, use 0
12698  * @URL:  the URL for the entity to load
12699  * @ID:  the System ID for the entity to load
12700  * @lst:  the return value for the set of parsed nodes
12701  *
12702  * Parse an external general entity
12703  * An external general parsed entity is well-formed if it matches the
12704  * production labeled extParsedEnt.
12705  *
12706  * [78] extParsedEnt ::= TextDecl? content
12707  *
12708  * Returns 0 if the entity is well formed, -1 in case of args problem and
12709  *    the parser error code otherwise
12710  */
12711 
12712 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12713 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12714 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12715     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12716 		                       ID, lst));
12717 }
12718 
12719 /**
12720  * xmlParseBalancedChunkMemory:
12721  * @doc:  the document the chunk pertains to
12722  * @sax:  the SAX handler bloc (possibly NULL)
12723  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12724  * @depth:  Used for loop detection, use 0
12725  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12726  * @lst:  the return value for the set of parsed nodes
12727  *
12728  * Parse a well-balanced chunk of an XML document
12729  * called by the parser
12730  * The allowed sequence for the Well Balanced Chunk is the one defined by
12731  * the content production in the XML grammar:
12732  *
12733  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12734  *
12735  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12736  *    the parser error code otherwise
12737  */
12738 
12739 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12740 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12741      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12742     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12743                                                 depth, string, lst, 0 );
12744 }
12745 #endif /* LIBXML_SAX1_ENABLED */
12746 
12747 /**
12748  * xmlParseBalancedChunkMemoryInternal:
12749  * @oldctxt:  the existing parsing context
12750  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12751  * @user_data:  the user data field for the parser context
12752  * @lst:  the return value for the set of parsed nodes
12753  *
12754  *
12755  * Parse a well-balanced chunk of an XML document
12756  * called by the parser
12757  * The allowed sequence for the Well Balanced Chunk is the one defined by
12758  * the content production in the XML grammar:
12759  *
12760  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12761  *
12762  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12763  * error code otherwise
12764  *
12765  * In case recover is set to 1, the nodelist will not be empty even if
12766  * the parsed chunk is not well balanced.
12767  */
12768 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12769 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12770 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12771     xmlParserCtxtPtr ctxt;
12772     xmlDocPtr newDoc = NULL;
12773     xmlNodePtr newRoot;
12774     xmlSAXHandlerPtr oldsax = NULL;
12775     xmlNodePtr content = NULL;
12776     xmlNodePtr last = NULL;
12777     int size;
12778     xmlParserErrors ret = XML_ERR_OK;
12779 #ifdef SAX2
12780     int i;
12781 #endif
12782 
12783     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12784         (oldctxt->depth >  1024)) {
12785 	return(XML_ERR_ENTITY_LOOP);
12786     }
12787 
12788 
12789     if (lst != NULL)
12790         *lst = NULL;
12791     if (string == NULL)
12792         return(XML_ERR_INTERNAL_ERROR);
12793 
12794     size = xmlStrlen(string);
12795 
12796     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12797     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12798     if (user_data != NULL)
12799 	ctxt->userData = user_data;
12800     else
12801 	ctxt->userData = ctxt;
12802     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12803     ctxt->dict = oldctxt->dict;
12804     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12805     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12806     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12807 
12808 #ifdef SAX2
12809     /* propagate namespaces down the entity */
12810     for (i = 0;i < oldctxt->nsNr;i += 2) {
12811         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12812     }
12813 #endif
12814 
12815     oldsax = ctxt->sax;
12816     ctxt->sax = oldctxt->sax;
12817     xmlDetectSAX2(ctxt);
12818     ctxt->replaceEntities = oldctxt->replaceEntities;
12819     ctxt->options = oldctxt->options;
12820 
12821     ctxt->_private = oldctxt->_private;
12822     if (oldctxt->myDoc == NULL) {
12823 	newDoc = xmlNewDoc(BAD_CAST "1.0");
12824 	if (newDoc == NULL) {
12825 	    ctxt->sax = oldsax;
12826 	    ctxt->dict = NULL;
12827 	    xmlFreeParserCtxt(ctxt);
12828 	    return(XML_ERR_INTERNAL_ERROR);
12829 	}
12830 	newDoc->properties = XML_DOC_INTERNAL;
12831 	newDoc->dict = ctxt->dict;
12832 	xmlDictReference(newDoc->dict);
12833 	ctxt->myDoc = newDoc;
12834     } else {
12835 	ctxt->myDoc = oldctxt->myDoc;
12836         content = ctxt->myDoc->children;
12837 	last = ctxt->myDoc->last;
12838     }
12839     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12840     if (newRoot == NULL) {
12841 	ctxt->sax = oldsax;
12842 	ctxt->dict = NULL;
12843 	xmlFreeParserCtxt(ctxt);
12844 	if (newDoc != NULL) {
12845 	    xmlFreeDoc(newDoc);
12846 	}
12847 	return(XML_ERR_INTERNAL_ERROR);
12848     }
12849     ctxt->myDoc->children = NULL;
12850     ctxt->myDoc->last = NULL;
12851     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12852     nodePush(ctxt, ctxt->myDoc->children);
12853     ctxt->instate = XML_PARSER_CONTENT;
12854     ctxt->depth = oldctxt->depth + 1;
12855 
12856     ctxt->validate = 0;
12857     ctxt->loadsubset = oldctxt->loadsubset;
12858     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12859 	/*
12860 	 * ID/IDREF registration will be done in xmlValidateElement below
12861 	 */
12862 	ctxt->loadsubset |= XML_SKIP_IDS;
12863     }
12864     ctxt->dictNames = oldctxt->dictNames;
12865     ctxt->attsDefault = oldctxt->attsDefault;
12866     ctxt->attsSpecial = oldctxt->attsSpecial;
12867 
12868     xmlParseContent(ctxt);
12869     if ((RAW == '<') && (NXT(1) == '/')) {
12870 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12871     } else if (RAW != 0) {
12872 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12873     }
12874     if (ctxt->node != ctxt->myDoc->children) {
12875 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12876     }
12877 
12878     if (!ctxt->wellFormed) {
12879         if (ctxt->errNo == 0)
12880 	    ret = XML_ERR_INTERNAL_ERROR;
12881 	else
12882 	    ret = (xmlParserErrors)ctxt->errNo;
12883     } else {
12884       ret = XML_ERR_OK;
12885     }
12886 
12887     if ((lst != NULL) && (ret == XML_ERR_OK)) {
12888 	xmlNodePtr cur;
12889 
12890 	/*
12891 	 * Return the newly created nodeset after unlinking it from
12892 	 * they pseudo parent.
12893 	 */
12894 	cur = ctxt->myDoc->children->children;
12895 	*lst = cur;
12896 	while (cur != NULL) {
12897 #ifdef LIBXML_VALID_ENABLED
12898 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12899 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12900 		(cur->type == XML_ELEMENT_NODE)) {
12901 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12902 			oldctxt->myDoc, cur);
12903 	    }
12904 #endif /* LIBXML_VALID_ENABLED */
12905 	    cur->parent = NULL;
12906 	    cur = cur->next;
12907 	}
12908 	ctxt->myDoc->children->children = NULL;
12909     }
12910     if (ctxt->myDoc != NULL) {
12911 	xmlFreeNode(ctxt->myDoc->children);
12912         ctxt->myDoc->children = content;
12913         ctxt->myDoc->last = last;
12914     }
12915 
12916     /*
12917      * Record in the parent context the number of entities replacement
12918      * done when parsing that reference.
12919      */
12920     if (oldctxt != NULL)
12921         oldctxt->nbentities += ctxt->nbentities;
12922 
12923     /*
12924      * Also record the last error if any
12925      */
12926     if (ctxt->lastError.code != XML_ERR_OK)
12927         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12928 
12929     ctxt->sax = oldsax;
12930     ctxt->dict = NULL;
12931     ctxt->attsDefault = NULL;
12932     ctxt->attsSpecial = NULL;
12933     xmlFreeParserCtxt(ctxt);
12934     if (newDoc != NULL) {
12935 	xmlFreeDoc(newDoc);
12936     }
12937 
12938     return(ret);
12939 }
12940 
12941 /**
12942  * xmlParseInNodeContext:
12943  * @node:  the context node
12944  * @data:  the input string
12945  * @datalen:  the input string length in bytes
12946  * @options:  a combination of xmlParserOption
12947  * @lst:  the return value for the set of parsed nodes
12948  *
12949  * Parse a well-balanced chunk of an XML document
12950  * within the context (DTD, namespaces, etc ...) of the given node.
12951  *
12952  * The allowed sequence for the data is a Well Balanced Chunk defined by
12953  * the content production in the XML grammar:
12954  *
12955  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12956  *
12957  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12958  * error code otherwise
12959  */
12960 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12961 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12962                       int options, xmlNodePtr *lst) {
12963 #ifdef SAX2
12964     xmlParserCtxtPtr ctxt;
12965     xmlDocPtr doc = NULL;
12966     xmlNodePtr fake, cur;
12967     int nsnr = 0;
12968 
12969     xmlParserErrors ret = XML_ERR_OK;
12970 
12971     /*
12972      * check all input parameters, grab the document
12973      */
12974     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12975         return(XML_ERR_INTERNAL_ERROR);
12976     switch (node->type) {
12977         case XML_ELEMENT_NODE:
12978         case XML_ATTRIBUTE_NODE:
12979         case XML_TEXT_NODE:
12980         case XML_CDATA_SECTION_NODE:
12981         case XML_ENTITY_REF_NODE:
12982         case XML_PI_NODE:
12983         case XML_COMMENT_NODE:
12984         case XML_DOCUMENT_NODE:
12985         case XML_HTML_DOCUMENT_NODE:
12986 	    break;
12987 	default:
12988 	    return(XML_ERR_INTERNAL_ERROR);
12989 
12990     }
12991     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12992            (node->type != XML_DOCUMENT_NODE) &&
12993 	   (node->type != XML_HTML_DOCUMENT_NODE))
12994 	node = node->parent;
12995     if (node == NULL)
12996 	return(XML_ERR_INTERNAL_ERROR);
12997     if (node->type == XML_ELEMENT_NODE)
12998 	doc = node->doc;
12999     else
13000         doc = (xmlDocPtr) node;
13001     if (doc == NULL)
13002 	return(XML_ERR_INTERNAL_ERROR);
13003 
13004     /*
13005      * allocate a context and set-up everything not related to the
13006      * node position in the tree
13007      */
13008     if (doc->type == XML_DOCUMENT_NODE)
13009 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13010 #ifdef LIBXML_HTML_ENABLED
13011     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13012 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13013         /*
13014          * When parsing in context, it makes no sense to add implied
13015          * elements like html/body/etc...
13016          */
13017         options |= HTML_PARSE_NOIMPLIED;
13018     }
13019 #endif
13020     else
13021         return(XML_ERR_INTERNAL_ERROR);
13022 
13023     if (ctxt == NULL)
13024         return(XML_ERR_NO_MEMORY);
13025 
13026     /*
13027      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13028      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13029      * we must wait until the last moment to free the original one.
13030      */
13031     if (doc->dict != NULL) {
13032         if (ctxt->dict != NULL)
13033 	    xmlDictFree(ctxt->dict);
13034 	ctxt->dict = doc->dict;
13035     } else
13036         options |= XML_PARSE_NODICT;
13037 
13038     if (doc->encoding != NULL) {
13039         xmlCharEncodingHandlerPtr hdlr;
13040 
13041         if (ctxt->encoding != NULL)
13042 	    xmlFree((xmlChar *) ctxt->encoding);
13043         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13044 
13045         hdlr = xmlFindCharEncodingHandler(doc->encoding);
13046         if (hdlr != NULL) {
13047             xmlSwitchToEncoding(ctxt, hdlr);
13048 	} else {
13049             return(XML_ERR_UNSUPPORTED_ENCODING);
13050         }
13051     }
13052 
13053     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13054     xmlDetectSAX2(ctxt);
13055     ctxt->myDoc = doc;
13056 
13057     fake = xmlNewComment(NULL);
13058     if (fake == NULL) {
13059         xmlFreeParserCtxt(ctxt);
13060 	return(XML_ERR_NO_MEMORY);
13061     }
13062     xmlAddChild(node, fake);
13063 
13064     if (node->type == XML_ELEMENT_NODE) {
13065 	nodePush(ctxt, node);
13066 	/*
13067 	 * initialize the SAX2 namespaces stack
13068 	 */
13069 	cur = node;
13070 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13071 	    xmlNsPtr ns = cur->nsDef;
13072 	    const xmlChar *iprefix, *ihref;
13073 
13074 	    while (ns != NULL) {
13075 		if (ctxt->dict) {
13076 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13077 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13078 		} else {
13079 		    iprefix = ns->prefix;
13080 		    ihref = ns->href;
13081 		}
13082 
13083 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13084 		    nsPush(ctxt, iprefix, ihref);
13085 		    nsnr++;
13086 		}
13087 		ns = ns->next;
13088 	    }
13089 	    cur = cur->parent;
13090 	}
13091 	ctxt->instate = XML_PARSER_CONTENT;
13092     }
13093 
13094     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13095 	/*
13096 	 * ID/IDREF registration will be done in xmlValidateElement below
13097 	 */
13098 	ctxt->loadsubset |= XML_SKIP_IDS;
13099     }
13100 
13101 #ifdef LIBXML_HTML_ENABLED
13102     if (doc->type == XML_HTML_DOCUMENT_NODE)
13103         __htmlParseContent(ctxt);
13104     else
13105 #endif
13106 	xmlParseContent(ctxt);
13107 
13108     nsPop(ctxt, nsnr);
13109     if ((RAW == '<') && (NXT(1) == '/')) {
13110 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13111     } else if (RAW != 0) {
13112 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13113     }
13114     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13115 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13116 	ctxt->wellFormed = 0;
13117     }
13118 
13119     if (!ctxt->wellFormed) {
13120         if (ctxt->errNo == 0)
13121 	    ret = XML_ERR_INTERNAL_ERROR;
13122 	else
13123 	    ret = (xmlParserErrors)ctxt->errNo;
13124     } else {
13125         ret = XML_ERR_OK;
13126     }
13127 
13128     /*
13129      * Return the newly created nodeset after unlinking it from
13130      * the pseudo sibling.
13131      */
13132 
13133     cur = fake->next;
13134     fake->next = NULL;
13135     node->last = fake;
13136 
13137     if (cur != NULL) {
13138 	cur->prev = NULL;
13139     }
13140 
13141     *lst = cur;
13142 
13143     while (cur != NULL) {
13144 	cur->parent = NULL;
13145 	cur = cur->next;
13146     }
13147 
13148     xmlUnlinkNode(fake);
13149     xmlFreeNode(fake);
13150 
13151 
13152     if (ret != XML_ERR_OK) {
13153         xmlFreeNodeList(*lst);
13154 	*lst = NULL;
13155     }
13156 
13157     if (doc->dict != NULL)
13158         ctxt->dict = NULL;
13159     xmlFreeParserCtxt(ctxt);
13160 
13161     return(ret);
13162 #else /* !SAX2 */
13163     return(XML_ERR_INTERNAL_ERROR);
13164 #endif
13165 }
13166 
13167 #ifdef LIBXML_SAX1_ENABLED
13168 /**
13169  * xmlParseBalancedChunkMemoryRecover:
13170  * @doc:  the document the chunk pertains to
13171  * @sax:  the SAX handler bloc (possibly NULL)
13172  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13173  * @depth:  Used for loop detection, use 0
13174  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13175  * @lst:  the return value for the set of parsed nodes
13176  * @recover: return nodes even if the data is broken (use 0)
13177  *
13178  *
13179  * Parse a well-balanced chunk of an XML document
13180  * called by the parser
13181  * The allowed sequence for the Well Balanced Chunk is the one defined by
13182  * the content production in the XML grammar:
13183  *
13184  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13185  *
13186  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13187  *    the parser error code otherwise
13188  *
13189  * In case recover is set to 1, the nodelist will not be empty even if
13190  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13191  * some extent.
13192  */
13193 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13194 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13195      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13196      int recover) {
13197     xmlParserCtxtPtr ctxt;
13198     xmlDocPtr newDoc;
13199     xmlSAXHandlerPtr oldsax = NULL;
13200     xmlNodePtr content, newRoot;
13201     int size;
13202     int ret = 0;
13203 
13204     if (depth > 40) {
13205 	return(XML_ERR_ENTITY_LOOP);
13206     }
13207 
13208 
13209     if (lst != NULL)
13210         *lst = NULL;
13211     if (string == NULL)
13212         return(-1);
13213 
13214     size = xmlStrlen(string);
13215 
13216     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13217     if (ctxt == NULL) return(-1);
13218     ctxt->userData = ctxt;
13219     if (sax != NULL) {
13220 	oldsax = ctxt->sax;
13221         ctxt->sax = sax;
13222 	if (user_data != NULL)
13223 	    ctxt->userData = user_data;
13224     }
13225     newDoc = xmlNewDoc(BAD_CAST "1.0");
13226     if (newDoc == NULL) {
13227 	xmlFreeParserCtxt(ctxt);
13228 	return(-1);
13229     }
13230     newDoc->properties = XML_DOC_INTERNAL;
13231     if ((doc != NULL) && (doc->dict != NULL)) {
13232         xmlDictFree(ctxt->dict);
13233 	ctxt->dict = doc->dict;
13234 	xmlDictReference(ctxt->dict);
13235 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13236 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13237 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13238 	ctxt->dictNames = 1;
13239     } else {
13240 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13241     }
13242     if (doc != NULL) {
13243 	newDoc->intSubset = doc->intSubset;
13244 	newDoc->extSubset = doc->extSubset;
13245     }
13246     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13247     if (newRoot == NULL) {
13248 	if (sax != NULL)
13249 	    ctxt->sax = oldsax;
13250 	xmlFreeParserCtxt(ctxt);
13251 	newDoc->intSubset = NULL;
13252 	newDoc->extSubset = NULL;
13253         xmlFreeDoc(newDoc);
13254 	return(-1);
13255     }
13256     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13257     nodePush(ctxt, newRoot);
13258     if (doc == NULL) {
13259 	ctxt->myDoc = newDoc;
13260     } else {
13261 	ctxt->myDoc = newDoc;
13262 	newDoc->children->doc = doc;
13263 	/* Ensure that doc has XML spec namespace */
13264 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13265 	newDoc->oldNs = doc->oldNs;
13266     }
13267     ctxt->instate = XML_PARSER_CONTENT;
13268     ctxt->depth = depth;
13269 
13270     /*
13271      * Doing validity checking on chunk doesn't make sense
13272      */
13273     ctxt->validate = 0;
13274     ctxt->loadsubset = 0;
13275     xmlDetectSAX2(ctxt);
13276 
13277     if ( doc != NULL ){
13278         content = doc->children;
13279         doc->children = NULL;
13280         xmlParseContent(ctxt);
13281         doc->children = content;
13282     }
13283     else {
13284         xmlParseContent(ctxt);
13285     }
13286     if ((RAW == '<') && (NXT(1) == '/')) {
13287 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13288     } else if (RAW != 0) {
13289 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13290     }
13291     if (ctxt->node != newDoc->children) {
13292 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13293     }
13294 
13295     if (!ctxt->wellFormed) {
13296         if (ctxt->errNo == 0)
13297 	    ret = 1;
13298 	else
13299 	    ret = ctxt->errNo;
13300     } else {
13301       ret = 0;
13302     }
13303 
13304     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13305 	xmlNodePtr cur;
13306 
13307 	/*
13308 	 * Return the newly created nodeset after unlinking it from
13309 	 * they pseudo parent.
13310 	 */
13311 	cur = newDoc->children->children;
13312 	*lst = cur;
13313 	while (cur != NULL) {
13314 	    xmlSetTreeDoc(cur, doc);
13315 	    cur->parent = NULL;
13316 	    cur = cur->next;
13317 	}
13318 	newDoc->children->children = NULL;
13319     }
13320 
13321     if (sax != NULL)
13322 	ctxt->sax = oldsax;
13323     xmlFreeParserCtxt(ctxt);
13324     newDoc->intSubset = NULL;
13325     newDoc->extSubset = NULL;
13326     newDoc->oldNs = NULL;
13327     xmlFreeDoc(newDoc);
13328 
13329     return(ret);
13330 }
13331 
13332 /**
13333  * xmlSAXParseEntity:
13334  * @sax:  the SAX handler block
13335  * @filename:  the filename
13336  *
13337  * parse an XML external entity out of context and build a tree.
13338  * It use the given SAX function block to handle the parsing callback.
13339  * If sax is NULL, fallback to the default DOM tree building routines.
13340  *
13341  * [78] extParsedEnt ::= TextDecl? content
13342  *
13343  * This correspond to a "Well Balanced" chunk
13344  *
13345  * Returns the resulting document tree
13346  */
13347 
13348 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13349 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13350     xmlDocPtr ret;
13351     xmlParserCtxtPtr ctxt;
13352 
13353     ctxt = xmlCreateFileParserCtxt(filename);
13354     if (ctxt == NULL) {
13355 	return(NULL);
13356     }
13357     if (sax != NULL) {
13358 	if (ctxt->sax != NULL)
13359 	    xmlFree(ctxt->sax);
13360         ctxt->sax = sax;
13361         ctxt->userData = NULL;
13362     }
13363 
13364     xmlParseExtParsedEnt(ctxt);
13365 
13366     if (ctxt->wellFormed)
13367 	ret = ctxt->myDoc;
13368     else {
13369         ret = NULL;
13370         xmlFreeDoc(ctxt->myDoc);
13371         ctxt->myDoc = NULL;
13372     }
13373     if (sax != NULL)
13374         ctxt->sax = NULL;
13375     xmlFreeParserCtxt(ctxt);
13376 
13377     return(ret);
13378 }
13379 
13380 /**
13381  * xmlParseEntity:
13382  * @filename:  the filename
13383  *
13384  * parse an XML external entity out of context and build a tree.
13385  *
13386  * [78] extParsedEnt ::= TextDecl? content
13387  *
13388  * This correspond to a "Well Balanced" chunk
13389  *
13390  * Returns the resulting document tree
13391  */
13392 
13393 xmlDocPtr
xmlParseEntity(const char * filename)13394 xmlParseEntity(const char *filename) {
13395     return(xmlSAXParseEntity(NULL, filename));
13396 }
13397 #endif /* LIBXML_SAX1_ENABLED */
13398 
13399 /**
13400  * xmlCreateEntityParserCtxtInternal:
13401  * @URL:  the entity URL
13402  * @ID:  the entity PUBLIC ID
13403  * @base:  a possible base for the target URI
13404  * @pctx:  parser context used to set options on new context
13405  *
13406  * Create a parser context for an external entity
13407  * Automatic support for ZLIB/Compress compressed document is provided
13408  * by default if found at compile-time.
13409  *
13410  * Returns the new parser context or NULL
13411  */
13412 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13413 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13414 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13415     xmlParserCtxtPtr ctxt;
13416     xmlParserInputPtr inputStream;
13417     char *directory = NULL;
13418     xmlChar *uri;
13419 
13420     ctxt = xmlNewParserCtxt();
13421     if (ctxt == NULL) {
13422 	return(NULL);
13423     }
13424 
13425     if (pctx != NULL) {
13426         ctxt->options = pctx->options;
13427         ctxt->_private = pctx->_private;
13428     }
13429 
13430     uri = xmlBuildURI(URL, base);
13431 
13432     if (uri == NULL) {
13433 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13434 	if (inputStream == NULL) {
13435 	    xmlFreeParserCtxt(ctxt);
13436 	    return(NULL);
13437 	}
13438 
13439 	inputPush(ctxt, inputStream);
13440 
13441 	if ((ctxt->directory == NULL) && (directory == NULL))
13442 	    directory = xmlParserGetDirectory((char *)URL);
13443 	if ((ctxt->directory == NULL) && (directory != NULL))
13444 	    ctxt->directory = directory;
13445     } else {
13446 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13447 	if (inputStream == NULL) {
13448 	    xmlFree(uri);
13449 	    xmlFreeParserCtxt(ctxt);
13450 	    return(NULL);
13451 	}
13452 
13453 	inputPush(ctxt, inputStream);
13454 
13455 	if ((ctxt->directory == NULL) && (directory == NULL))
13456 	    directory = xmlParserGetDirectory((char *)uri);
13457 	if ((ctxt->directory == NULL) && (directory != NULL))
13458 	    ctxt->directory = directory;
13459 	xmlFree(uri);
13460     }
13461     return(ctxt);
13462 }
13463 
13464 /**
13465  * xmlCreateEntityParserCtxt:
13466  * @URL:  the entity URL
13467  * @ID:  the entity PUBLIC ID
13468  * @base:  a possible base for the target URI
13469  *
13470  * Create a parser context for an external entity
13471  * Automatic support for ZLIB/Compress compressed document is provided
13472  * by default if found at compile-time.
13473  *
13474  * Returns the new parser context or NULL
13475  */
13476 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13477 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13478 	                  const xmlChar *base) {
13479     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13480 
13481 }
13482 
13483 /************************************************************************
13484  *									*
13485  *		Front ends when parsing from a file			*
13486  *									*
13487  ************************************************************************/
13488 
13489 /**
13490  * xmlCreateURLParserCtxt:
13491  * @filename:  the filename or URL
13492  * @options:  a combination of xmlParserOption
13493  *
13494  * Create a parser context for a file or URL content.
13495  * Automatic support for ZLIB/Compress compressed document is provided
13496  * by default if found at compile-time and for file accesses
13497  *
13498  * Returns the new parser context or NULL
13499  */
13500 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13501 xmlCreateURLParserCtxt(const char *filename, int options)
13502 {
13503     xmlParserCtxtPtr ctxt;
13504     xmlParserInputPtr inputStream;
13505     char *directory = NULL;
13506 
13507     ctxt = xmlNewParserCtxt();
13508     if (ctxt == NULL) {
13509 	xmlErrMemory(NULL, "cannot allocate parser context");
13510 	return(NULL);
13511     }
13512 
13513     if (options)
13514 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13515     ctxt->linenumbers = 1;
13516 
13517     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13518     if (inputStream == NULL) {
13519 	xmlFreeParserCtxt(ctxt);
13520 	return(NULL);
13521     }
13522 
13523     inputPush(ctxt, inputStream);
13524     if ((ctxt->directory == NULL) && (directory == NULL))
13525         directory = xmlParserGetDirectory(filename);
13526     if ((ctxt->directory == NULL) && (directory != NULL))
13527         ctxt->directory = directory;
13528 
13529     return(ctxt);
13530 }
13531 
13532 /**
13533  * xmlCreateFileParserCtxt:
13534  * @filename:  the filename
13535  *
13536  * Create a parser context for a file content.
13537  * Automatic support for ZLIB/Compress compressed document is provided
13538  * by default if found at compile-time.
13539  *
13540  * Returns the new parser context or NULL
13541  */
13542 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13543 xmlCreateFileParserCtxt(const char *filename)
13544 {
13545     return(xmlCreateURLParserCtxt(filename, 0));
13546 }
13547 
13548 #ifdef LIBXML_SAX1_ENABLED
13549 /**
13550  * xmlSAXParseFileWithData:
13551  * @sax:  the SAX handler block
13552  * @filename:  the filename
13553  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13554  *             documents
13555  * @data:  the userdata
13556  *
13557  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13558  * compressed document is provided by default if found at compile-time.
13559  * It use the given SAX function block to handle the parsing callback.
13560  * If sax is NULL, fallback to the default DOM tree building routines.
13561  *
13562  * User data (void *) is stored within the parser context in the
13563  * context's _private member, so it is available nearly everywhere in libxml
13564  *
13565  * Returns the resulting document tree
13566  */
13567 
13568 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13569 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13570                         int recovery, void *data) {
13571     xmlDocPtr ret;
13572     xmlParserCtxtPtr ctxt;
13573 
13574     xmlInitParser();
13575 
13576     ctxt = xmlCreateFileParserCtxt(filename);
13577     if (ctxt == NULL) {
13578 	return(NULL);
13579     }
13580     if (sax != NULL) {
13581 	if (ctxt->sax != NULL)
13582 	    xmlFree(ctxt->sax);
13583         ctxt->sax = sax;
13584     }
13585     xmlDetectSAX2(ctxt);
13586     if (data!=NULL) {
13587 	ctxt->_private = data;
13588     }
13589 
13590     if (ctxt->directory == NULL)
13591         ctxt->directory = xmlParserGetDirectory(filename);
13592 
13593     ctxt->recovery = recovery;
13594 
13595     xmlParseDocument(ctxt);
13596 
13597     if ((ctxt->wellFormed) || recovery) {
13598         ret = ctxt->myDoc;
13599 	if (ret != NULL) {
13600 	    if (ctxt->input->buf->compressed > 0)
13601 		ret->compression = 9;
13602 	    else
13603 		ret->compression = ctxt->input->buf->compressed;
13604 	}
13605     }
13606     else {
13607        ret = NULL;
13608        xmlFreeDoc(ctxt->myDoc);
13609        ctxt->myDoc = NULL;
13610     }
13611     if (sax != NULL)
13612         ctxt->sax = NULL;
13613     xmlFreeParserCtxt(ctxt);
13614 
13615     return(ret);
13616 }
13617 
13618 /**
13619  * xmlSAXParseFile:
13620  * @sax:  the SAX handler block
13621  * @filename:  the filename
13622  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13623  *             documents
13624  *
13625  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13626  * compressed document is provided by default if found at compile-time.
13627  * It use the given SAX function block to handle the parsing callback.
13628  * If sax is NULL, fallback to the default DOM tree building routines.
13629  *
13630  * Returns the resulting document tree
13631  */
13632 
13633 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13634 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13635                           int recovery) {
13636     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13637 }
13638 
13639 /**
13640  * xmlRecoverDoc:
13641  * @cur:  a pointer to an array of xmlChar
13642  *
13643  * parse an XML in-memory document and build a tree.
13644  * In the case the document is not Well Formed, a attempt to build a
13645  * tree is tried anyway
13646  *
13647  * Returns the resulting document tree or NULL in case of failure
13648  */
13649 
13650 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13651 xmlRecoverDoc(const xmlChar *cur) {
13652     return(xmlSAXParseDoc(NULL, cur, 1));
13653 }
13654 
13655 /**
13656  * xmlParseFile:
13657  * @filename:  the filename
13658  *
13659  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13660  * compressed document is provided by default if found at compile-time.
13661  *
13662  * Returns the resulting document tree if the file was wellformed,
13663  * NULL otherwise.
13664  */
13665 
13666 xmlDocPtr
xmlParseFile(const char * filename)13667 xmlParseFile(const char *filename) {
13668     return(xmlSAXParseFile(NULL, filename, 0));
13669 }
13670 
13671 /**
13672  * xmlRecoverFile:
13673  * @filename:  the filename
13674  *
13675  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13676  * compressed document is provided by default if found at compile-time.
13677  * In the case the document is not Well Formed, it attempts to build
13678  * a tree anyway
13679  *
13680  * Returns the resulting document tree or NULL in case of failure
13681  */
13682 
13683 xmlDocPtr
xmlRecoverFile(const char * filename)13684 xmlRecoverFile(const char *filename) {
13685     return(xmlSAXParseFile(NULL, filename, 1));
13686 }
13687 
13688 
13689 /**
13690  * xmlSetupParserForBuffer:
13691  * @ctxt:  an XML parser context
13692  * @buffer:  a xmlChar * buffer
13693  * @filename:  a file name
13694  *
13695  * Setup the parser context to parse a new buffer; Clears any prior
13696  * contents from the parser context. The buffer parameter must not be
13697  * NULL, but the filename parameter can be
13698  */
13699 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13700 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13701                              const char* filename)
13702 {
13703     xmlParserInputPtr input;
13704 
13705     if ((ctxt == NULL) || (buffer == NULL))
13706         return;
13707 
13708     input = xmlNewInputStream(ctxt);
13709     if (input == NULL) {
13710         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13711         xmlClearParserCtxt(ctxt);
13712         return;
13713     }
13714 
13715     xmlClearParserCtxt(ctxt);
13716     if (filename != NULL)
13717         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13718     input->base = buffer;
13719     input->cur = buffer;
13720     input->end = &buffer[xmlStrlen(buffer)];
13721     inputPush(ctxt, input);
13722 }
13723 
13724 /**
13725  * xmlSAXUserParseFile:
13726  * @sax:  a SAX handler
13727  * @user_data:  The user data returned on SAX callbacks
13728  * @filename:  a file name
13729  *
13730  * parse an XML file and call the given SAX handler routines.
13731  * Automatic support for ZLIB/Compress compressed document is provided
13732  *
13733  * Returns 0 in case of success or a error number otherwise
13734  */
13735 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13736 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13737                     const char *filename) {
13738     int ret = 0;
13739     xmlParserCtxtPtr ctxt;
13740 
13741     ctxt = xmlCreateFileParserCtxt(filename);
13742     if (ctxt == NULL) return -1;
13743     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13744 	xmlFree(ctxt->sax);
13745     ctxt->sax = sax;
13746     xmlDetectSAX2(ctxt);
13747 
13748     if (user_data != NULL)
13749 	ctxt->userData = user_data;
13750 
13751     xmlParseDocument(ctxt);
13752 
13753     if (ctxt->wellFormed)
13754 	ret = 0;
13755     else {
13756         if (ctxt->errNo != 0)
13757 	    ret = ctxt->errNo;
13758 	else
13759 	    ret = -1;
13760     }
13761     if (sax != NULL)
13762 	ctxt->sax = NULL;
13763     if (ctxt->myDoc != NULL) {
13764         xmlFreeDoc(ctxt->myDoc);
13765 	ctxt->myDoc = NULL;
13766     }
13767     xmlFreeParserCtxt(ctxt);
13768 
13769     return ret;
13770 }
13771 #endif /* LIBXML_SAX1_ENABLED */
13772 
13773 /************************************************************************
13774  *									*
13775  * 		Front ends when parsing from memory			*
13776  *									*
13777  ************************************************************************/
13778 
13779 /**
13780  * xmlCreateMemoryParserCtxt:
13781  * @buffer:  a pointer to a char array
13782  * @size:  the size of the array
13783  *
13784  * Create a parser context for an XML in-memory document.
13785  *
13786  * Returns the new parser context or NULL
13787  */
13788 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13789 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13790     xmlParserCtxtPtr ctxt;
13791     xmlParserInputPtr input;
13792     xmlParserInputBufferPtr buf;
13793 
13794     if (buffer == NULL)
13795 	return(NULL);
13796     if (size <= 0)
13797 	return(NULL);
13798 
13799     ctxt = xmlNewParserCtxt();
13800     if (ctxt == NULL)
13801 	return(NULL);
13802 
13803     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13804     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13805     if (buf == NULL) {
13806 	xmlFreeParserCtxt(ctxt);
13807 	return(NULL);
13808     }
13809 
13810     input = xmlNewInputStream(ctxt);
13811     if (input == NULL) {
13812 	xmlFreeParserInputBuffer(buf);
13813 	xmlFreeParserCtxt(ctxt);
13814 	return(NULL);
13815     }
13816 
13817     input->filename = NULL;
13818     input->buf = buf;
13819     input->base = input->buf->buffer->content;
13820     input->cur = input->buf->buffer->content;
13821     input->end = &input->buf->buffer->content[input->buf->buffer->use];
13822 
13823     inputPush(ctxt, input);
13824     return(ctxt);
13825 }
13826 
13827 #ifdef LIBXML_SAX1_ENABLED
13828 /**
13829  * xmlSAXParseMemoryWithData:
13830  * @sax:  the SAX handler block
13831  * @buffer:  an pointer to a char array
13832  * @size:  the size of the array
13833  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13834  *             documents
13835  * @data:  the userdata
13836  *
13837  * parse an XML in-memory block and use the given SAX function block
13838  * to handle the parsing callback. If sax is NULL, fallback to the default
13839  * DOM tree building routines.
13840  *
13841  * User data (void *) is stored within the parser context in the
13842  * context's _private member, so it is available nearly everywhere in libxml
13843  *
13844  * Returns the resulting document tree
13845  */
13846 
13847 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13848 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13849 	          int size, int recovery, void *data) {
13850     xmlDocPtr ret;
13851     xmlParserCtxtPtr ctxt;
13852 
13853     xmlInitParser();
13854 
13855     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13856     if (ctxt == NULL) return(NULL);
13857     if (sax != NULL) {
13858 	if (ctxt->sax != NULL)
13859 	    xmlFree(ctxt->sax);
13860         ctxt->sax = sax;
13861     }
13862     xmlDetectSAX2(ctxt);
13863     if (data!=NULL) {
13864 	ctxt->_private=data;
13865     }
13866 
13867     ctxt->recovery = recovery;
13868 
13869     xmlParseDocument(ctxt);
13870 
13871     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13872     else {
13873        ret = NULL;
13874        xmlFreeDoc(ctxt->myDoc);
13875        ctxt->myDoc = NULL;
13876     }
13877     if (sax != NULL)
13878 	ctxt->sax = NULL;
13879     xmlFreeParserCtxt(ctxt);
13880 
13881     return(ret);
13882 }
13883 
13884 /**
13885  * xmlSAXParseMemory:
13886  * @sax:  the SAX handler block
13887  * @buffer:  an pointer to a char array
13888  * @size:  the size of the array
13889  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13890  *             documents
13891  *
13892  * parse an XML in-memory block and use the given SAX function block
13893  * to handle the parsing callback. If sax is NULL, fallback to the default
13894  * DOM tree building routines.
13895  *
13896  * Returns the resulting document tree
13897  */
13898 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13899 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13900 	          int size, int recovery) {
13901     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13902 }
13903 
13904 /**
13905  * xmlParseMemory:
13906  * @buffer:  an pointer to a char array
13907  * @size:  the size of the array
13908  *
13909  * parse an XML in-memory block and build a tree.
13910  *
13911  * Returns the resulting document tree
13912  */
13913 
xmlParseMemory(const char * buffer,int size)13914 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13915    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13916 }
13917 
13918 /**
13919  * xmlRecoverMemory:
13920  * @buffer:  an pointer to a char array
13921  * @size:  the size of the array
13922  *
13923  * parse an XML in-memory block and build a tree.
13924  * In the case the document is not Well Formed, an attempt to
13925  * build a tree is tried anyway
13926  *
13927  * Returns the resulting document tree or NULL in case of error
13928  */
13929 
xmlRecoverMemory(const char * buffer,int size)13930 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13931    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13932 }
13933 
13934 /**
13935  * xmlSAXUserParseMemory:
13936  * @sax:  a SAX handler
13937  * @user_data:  The user data returned on SAX callbacks
13938  * @buffer:  an in-memory XML document input
13939  * @size:  the length of the XML document in bytes
13940  *
13941  * A better SAX parsing routine.
13942  * parse an XML in-memory buffer and call the given SAX handler routines.
13943  *
13944  * Returns 0 in case of success or a error number otherwise
13945  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13946 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13947 			  const char *buffer, int size) {
13948     int ret = 0;
13949     xmlParserCtxtPtr ctxt;
13950 
13951     xmlInitParser();
13952 
13953     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13954     if (ctxt == NULL) return -1;
13955     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13956         xmlFree(ctxt->sax);
13957     ctxt->sax = sax;
13958     xmlDetectSAX2(ctxt);
13959 
13960     if (user_data != NULL)
13961 	ctxt->userData = user_data;
13962 
13963     xmlParseDocument(ctxt);
13964 
13965     if (ctxt->wellFormed)
13966 	ret = 0;
13967     else {
13968         if (ctxt->errNo != 0)
13969 	    ret = ctxt->errNo;
13970 	else
13971 	    ret = -1;
13972     }
13973     if (sax != NULL)
13974         ctxt->sax = NULL;
13975     if (ctxt->myDoc != NULL) {
13976         xmlFreeDoc(ctxt->myDoc);
13977 	ctxt->myDoc = NULL;
13978     }
13979     xmlFreeParserCtxt(ctxt);
13980 
13981     return ret;
13982 }
13983 #endif /* LIBXML_SAX1_ENABLED */
13984 
13985 /**
13986  * xmlCreateDocParserCtxt:
13987  * @cur:  a pointer to an array of xmlChar
13988  *
13989  * Creates a parser context for an XML in-memory document.
13990  *
13991  * Returns the new parser context or NULL
13992  */
13993 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)13994 xmlCreateDocParserCtxt(const xmlChar *cur) {
13995     int len;
13996 
13997     if (cur == NULL)
13998 	return(NULL);
13999     len = xmlStrlen(cur);
14000     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14001 }
14002 
14003 #ifdef LIBXML_SAX1_ENABLED
14004 /**
14005  * xmlSAXParseDoc:
14006  * @sax:  the SAX handler block
14007  * @cur:  a pointer to an array of xmlChar
14008  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14009  *             documents
14010  *
14011  * parse an XML in-memory document and build a tree.
14012  * It use the given SAX function block to handle the parsing callback.
14013  * If sax is NULL, fallback to the default DOM tree building routines.
14014  *
14015  * Returns the resulting document tree
14016  */
14017 
14018 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14019 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14020     xmlDocPtr ret;
14021     xmlParserCtxtPtr ctxt;
14022     xmlSAXHandlerPtr oldsax = NULL;
14023 
14024     if (cur == NULL) return(NULL);
14025 
14026 
14027     ctxt = xmlCreateDocParserCtxt(cur);
14028     if (ctxt == NULL) return(NULL);
14029     if (sax != NULL) {
14030         oldsax = ctxt->sax;
14031         ctxt->sax = sax;
14032         ctxt->userData = NULL;
14033     }
14034     xmlDetectSAX2(ctxt);
14035 
14036     xmlParseDocument(ctxt);
14037     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14038     else {
14039        ret = NULL;
14040        xmlFreeDoc(ctxt->myDoc);
14041        ctxt->myDoc = NULL;
14042     }
14043     if (sax != NULL)
14044 	ctxt->sax = oldsax;
14045     xmlFreeParserCtxt(ctxt);
14046 
14047     return(ret);
14048 }
14049 
14050 /**
14051  * xmlParseDoc:
14052  * @cur:  a pointer to an array of xmlChar
14053  *
14054  * parse an XML in-memory document and build a tree.
14055  *
14056  * Returns the resulting document tree
14057  */
14058 
14059 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14060 xmlParseDoc(const xmlChar *cur) {
14061     return(xmlSAXParseDoc(NULL, cur, 0));
14062 }
14063 #endif /* LIBXML_SAX1_ENABLED */
14064 
14065 #ifdef LIBXML_LEGACY_ENABLED
14066 /************************************************************************
14067  *									*
14068  * 	Specific function to keep track of entities references		*
14069  * 	and used by the XSLT debugger					*
14070  *									*
14071  ************************************************************************/
14072 
14073 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14074 
14075 /**
14076  * xmlAddEntityReference:
14077  * @ent : A valid entity
14078  * @firstNode : A valid first node for children of entity
14079  * @lastNode : A valid last node of children entity
14080  *
14081  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14082  */
14083 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14084 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14085                       xmlNodePtr lastNode)
14086 {
14087     if (xmlEntityRefFunc != NULL) {
14088         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14089     }
14090 }
14091 
14092 
14093 /**
14094  * xmlSetEntityReferenceFunc:
14095  * @func: A valid function
14096  *
14097  * Set the function to call call back when a xml reference has been made
14098  */
14099 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14100 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14101 {
14102     xmlEntityRefFunc = func;
14103 }
14104 #endif /* LIBXML_LEGACY_ENABLED */
14105 
14106 /************************************************************************
14107  *									*
14108  * 				Miscellaneous				*
14109  *									*
14110  ************************************************************************/
14111 
14112 #ifdef LIBXML_XPATH_ENABLED
14113 #include <libxml/xpath.h>
14114 #endif
14115 
14116 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14117 static int xmlParserInitialized = 0;
14118 
14119 /**
14120  * xmlInitParser:
14121  *
14122  * Initialization function for the XML parser.
14123  * This is not reentrant. Call once before processing in case of
14124  * use in multithreaded programs.
14125  */
14126 
14127 void
xmlInitParser(void)14128 xmlInitParser(void) {
14129     if (xmlParserInitialized != 0)
14130 	return;
14131 
14132 #ifdef LIBXML_THREAD_ENABLED
14133     __xmlGlobalInitMutexLock();
14134     if (xmlParserInitialized == 0) {
14135 #endif
14136 	xmlInitThreads();
14137 	xmlInitGlobals();
14138 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14139 	    (xmlGenericError == NULL))
14140 	    initGenericErrorDefaultFunc(NULL);
14141 	xmlInitMemory();
14142 	xmlInitCharEncodingHandlers();
14143 	xmlDefaultSAXHandlerInit();
14144 	xmlRegisterDefaultInputCallbacks();
14145 #ifdef LIBXML_OUTPUT_ENABLED
14146 	xmlRegisterDefaultOutputCallbacks();
14147 #endif /* LIBXML_OUTPUT_ENABLED */
14148 #ifdef LIBXML_HTML_ENABLED
14149 	htmlInitAutoClose();
14150 	htmlDefaultSAXHandlerInit();
14151 #endif
14152 #ifdef LIBXML_XPATH_ENABLED
14153 	xmlXPathInit();
14154 #endif
14155 	xmlParserInitialized = 1;
14156 #ifdef LIBXML_THREAD_ENABLED
14157     }
14158     __xmlGlobalInitMutexUnlock();
14159 #endif
14160 }
14161 
14162 /**
14163  * xmlCleanupParser:
14164  *
14165  * This function name is somewhat misleading. It does not clean up
14166  * parser state, it cleans up memory allocated by the library itself.
14167  * It is a cleanup function for the XML library. It tries to reclaim all
14168  * related global memory allocated for the library processing.
14169  * It doesn't deallocate any document related memory. One should
14170  * call xmlCleanupParser() only when the process has finished using
14171  * the library and all XML/HTML documents built with it.
14172  * See also xmlInitParser() which has the opposite function of preparing
14173  * the library for operations.
14174  *
14175  * WARNING: if your application is multithreaded or has plugin support
14176  *          calling this may crash the application if another thread or
14177  *          a plugin is still using libxml2. It's sometimes very hard to
14178  *          guess if libxml2 is in use in the application, some libraries
14179  *          or plugins may use it without notice. In case of doubt abstain
14180  *          from calling this function or do it just before calling exit()
14181  *          to avoid leak reports from valgrind !
14182  */
14183 
14184 void
xmlCleanupParser(void)14185 xmlCleanupParser(void) {
14186     if (!xmlParserInitialized)
14187 	return;
14188 
14189     xmlCleanupCharEncodingHandlers();
14190 #ifdef LIBXML_CATALOG_ENABLED
14191     xmlCatalogCleanup();
14192 #endif
14193     xmlDictCleanup();
14194     xmlCleanupInputCallbacks();
14195 #ifdef LIBXML_OUTPUT_ENABLED
14196     xmlCleanupOutputCallbacks();
14197 #endif
14198 #ifdef LIBXML_SCHEMAS_ENABLED
14199     xmlSchemaCleanupTypes();
14200     xmlRelaxNGCleanupTypes();
14201 #endif
14202     xmlCleanupGlobals();
14203     xmlResetLastError();
14204     xmlCleanupThreads(); /* must be last if called not from the main thread */
14205     xmlCleanupMemory();
14206     xmlParserInitialized = 0;
14207 }
14208 
14209 /************************************************************************
14210  *									*
14211  *	New set (2.6.0) of simpler and more flexible APIs		*
14212  *									*
14213  ************************************************************************/
14214 
14215 /**
14216  * DICT_FREE:
14217  * @str:  a string
14218  *
14219  * Free a string if it is not owned by the "dict" dictionnary in the
14220  * current scope
14221  */
14222 #define DICT_FREE(str)						\
14223 	if ((str) && ((!dict) || 				\
14224 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14225 	    xmlFree((char *)(str));
14226 
14227 /**
14228  * xmlCtxtReset:
14229  * @ctxt: an XML parser context
14230  *
14231  * Reset a parser context
14232  */
14233 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14234 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14235 {
14236     xmlParserInputPtr input;
14237     xmlDictPtr dict;
14238 
14239     if (ctxt == NULL)
14240         return;
14241 
14242     dict = ctxt->dict;
14243 
14244     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14245         xmlFreeInputStream(input);
14246     }
14247     ctxt->inputNr = 0;
14248     ctxt->input = NULL;
14249 
14250     ctxt->spaceNr = 0;
14251     if (ctxt->spaceTab != NULL) {
14252 	ctxt->spaceTab[0] = -1;
14253 	ctxt->space = &ctxt->spaceTab[0];
14254     } else {
14255         ctxt->space = NULL;
14256     }
14257 
14258 
14259     ctxt->nodeNr = 0;
14260     ctxt->node = NULL;
14261 
14262     ctxt->nameNr = 0;
14263     ctxt->name = NULL;
14264 
14265     DICT_FREE(ctxt->version);
14266     ctxt->version = NULL;
14267     DICT_FREE(ctxt->encoding);
14268     ctxt->encoding = NULL;
14269     DICT_FREE(ctxt->directory);
14270     ctxt->directory = NULL;
14271     DICT_FREE(ctxt->extSubURI);
14272     ctxt->extSubURI = NULL;
14273     DICT_FREE(ctxt->extSubSystem);
14274     ctxt->extSubSystem = NULL;
14275     if (ctxt->myDoc != NULL)
14276         xmlFreeDoc(ctxt->myDoc);
14277     ctxt->myDoc = NULL;
14278 
14279     ctxt->standalone = -1;
14280     ctxt->hasExternalSubset = 0;
14281     ctxt->hasPErefs = 0;
14282     ctxt->html = 0;
14283     ctxt->external = 0;
14284     ctxt->instate = XML_PARSER_START;
14285     ctxt->token = 0;
14286 
14287     ctxt->wellFormed = 1;
14288     ctxt->nsWellFormed = 1;
14289     ctxt->disableSAX = 0;
14290     ctxt->valid = 1;
14291 #if 0
14292     ctxt->vctxt.userData = ctxt;
14293     ctxt->vctxt.error = xmlParserValidityError;
14294     ctxt->vctxt.warning = xmlParserValidityWarning;
14295 #endif
14296     ctxt->record_info = 0;
14297     ctxt->nbChars = 0;
14298     ctxt->checkIndex = 0;
14299     ctxt->inSubset = 0;
14300     ctxt->errNo = XML_ERR_OK;
14301     ctxt->depth = 0;
14302     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14303     ctxt->catalogs = NULL;
14304     ctxt->nbentities = 0;
14305     ctxt->sizeentities = 0;
14306     xmlInitNodeInfoSeq(&ctxt->node_seq);
14307 
14308     if (ctxt->attsDefault != NULL) {
14309         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14310         ctxt->attsDefault = NULL;
14311     }
14312     if (ctxt->attsSpecial != NULL) {
14313         xmlHashFree(ctxt->attsSpecial, NULL);
14314         ctxt->attsSpecial = NULL;
14315     }
14316 
14317 #ifdef LIBXML_CATALOG_ENABLED
14318     if (ctxt->catalogs != NULL)
14319 	xmlCatalogFreeLocal(ctxt->catalogs);
14320 #endif
14321     if (ctxt->lastError.code != XML_ERR_OK)
14322         xmlResetError(&ctxt->lastError);
14323 }
14324 
14325 /**
14326  * xmlCtxtResetPush:
14327  * @ctxt: an XML parser context
14328  * @chunk:  a pointer to an array of chars
14329  * @size:  number of chars in the array
14330  * @filename:  an optional file name or URI
14331  * @encoding:  the document encoding, or NULL
14332  *
14333  * Reset a push parser context
14334  *
14335  * Returns 0 in case of success and 1 in case of error
14336  */
14337 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14338 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14339                  int size, const char *filename, const char *encoding)
14340 {
14341     xmlParserInputPtr inputStream;
14342     xmlParserInputBufferPtr buf;
14343     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14344 
14345     if (ctxt == NULL)
14346         return(1);
14347 
14348     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14349         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14350 
14351     buf = xmlAllocParserInputBuffer(enc);
14352     if (buf == NULL)
14353         return(1);
14354 
14355     if (ctxt == NULL) {
14356         xmlFreeParserInputBuffer(buf);
14357         return(1);
14358     }
14359 
14360     xmlCtxtReset(ctxt);
14361 
14362     if (ctxt->pushTab == NULL) {
14363         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14364 	                                    sizeof(xmlChar *));
14365         if (ctxt->pushTab == NULL) {
14366 	    xmlErrMemory(ctxt, NULL);
14367             xmlFreeParserInputBuffer(buf);
14368             return(1);
14369         }
14370     }
14371 
14372     if (filename == NULL) {
14373         ctxt->directory = NULL;
14374     } else {
14375         ctxt->directory = xmlParserGetDirectory(filename);
14376     }
14377 
14378     inputStream = xmlNewInputStream(ctxt);
14379     if (inputStream == NULL) {
14380         xmlFreeParserInputBuffer(buf);
14381         return(1);
14382     }
14383 
14384     if (filename == NULL)
14385         inputStream->filename = NULL;
14386     else
14387         inputStream->filename = (char *)
14388             xmlCanonicPath((const xmlChar *) filename);
14389     inputStream->buf = buf;
14390     inputStream->base = inputStream->buf->buffer->content;
14391     inputStream->cur = inputStream->buf->buffer->content;
14392     inputStream->end =
14393         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14394 
14395     inputPush(ctxt, inputStream);
14396 
14397     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14398         (ctxt->input->buf != NULL)) {
14399         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14400         int cur = ctxt->input->cur - ctxt->input->base;
14401 
14402         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14403 
14404         ctxt->input->base = ctxt->input->buf->buffer->content + base;
14405         ctxt->input->cur = ctxt->input->base + cur;
14406         ctxt->input->end =
14407             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14408                                                use];
14409 #ifdef DEBUG_PUSH
14410         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14411 #endif
14412     }
14413 
14414     if (encoding != NULL) {
14415         xmlCharEncodingHandlerPtr hdlr;
14416 
14417         if (ctxt->encoding != NULL)
14418 	    xmlFree((xmlChar *) ctxt->encoding);
14419         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14420 
14421         hdlr = xmlFindCharEncodingHandler(encoding);
14422         if (hdlr != NULL) {
14423             xmlSwitchToEncoding(ctxt, hdlr);
14424 	} else {
14425 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14426 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14427         }
14428     } else if (enc != XML_CHAR_ENCODING_NONE) {
14429         xmlSwitchEncoding(ctxt, enc);
14430     }
14431 
14432     return(0);
14433 }
14434 
14435 
14436 /**
14437  * xmlCtxtUseOptionsInternal:
14438  * @ctxt: an XML parser context
14439  * @options:  a combination of xmlParserOption
14440  * @encoding:  the user provided encoding to use
14441  *
14442  * Applies the options to the parser context
14443  *
14444  * Returns 0 in case of success, the set of unknown or unimplemented options
14445  *         in case of error.
14446  */
14447 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14448 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14449 {
14450     if (ctxt == NULL)
14451         return(-1);
14452     if (encoding != NULL) {
14453         if (ctxt->encoding != NULL)
14454 	    xmlFree((xmlChar *) ctxt->encoding);
14455         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14456     }
14457     if (options & XML_PARSE_RECOVER) {
14458         ctxt->recovery = 1;
14459         options -= XML_PARSE_RECOVER;
14460 	ctxt->options |= XML_PARSE_RECOVER;
14461     } else
14462         ctxt->recovery = 0;
14463     if (options & XML_PARSE_DTDLOAD) {
14464         ctxt->loadsubset = XML_DETECT_IDS;
14465         options -= XML_PARSE_DTDLOAD;
14466 	ctxt->options |= XML_PARSE_DTDLOAD;
14467     } else
14468         ctxt->loadsubset = 0;
14469     if (options & XML_PARSE_DTDATTR) {
14470         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14471         options -= XML_PARSE_DTDATTR;
14472 	ctxt->options |= XML_PARSE_DTDATTR;
14473     }
14474     if (options & XML_PARSE_NOENT) {
14475         ctxt->replaceEntities = 1;
14476         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14477         options -= XML_PARSE_NOENT;
14478 	ctxt->options |= XML_PARSE_NOENT;
14479     } else
14480         ctxt->replaceEntities = 0;
14481     if (options & XML_PARSE_PEDANTIC) {
14482         ctxt->pedantic = 1;
14483         options -= XML_PARSE_PEDANTIC;
14484 	ctxt->options |= XML_PARSE_PEDANTIC;
14485     } else
14486         ctxt->pedantic = 0;
14487     if (options & XML_PARSE_NOBLANKS) {
14488         ctxt->keepBlanks = 0;
14489         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14490         options -= XML_PARSE_NOBLANKS;
14491 	ctxt->options |= XML_PARSE_NOBLANKS;
14492     } else
14493         ctxt->keepBlanks = 1;
14494     if (options & XML_PARSE_DTDVALID) {
14495         ctxt->validate = 1;
14496         if (options & XML_PARSE_NOWARNING)
14497             ctxt->vctxt.warning = NULL;
14498         if (options & XML_PARSE_NOERROR)
14499             ctxt->vctxt.error = NULL;
14500         options -= XML_PARSE_DTDVALID;
14501 	ctxt->options |= XML_PARSE_DTDVALID;
14502     } else
14503         ctxt->validate = 0;
14504     if (options & XML_PARSE_NOWARNING) {
14505         ctxt->sax->warning = NULL;
14506         options -= XML_PARSE_NOWARNING;
14507     }
14508     if (options & XML_PARSE_NOERROR) {
14509         ctxt->sax->error = NULL;
14510         ctxt->sax->fatalError = NULL;
14511         options -= XML_PARSE_NOERROR;
14512     }
14513 #ifdef LIBXML_SAX1_ENABLED
14514     if (options & XML_PARSE_SAX1) {
14515         ctxt->sax->startElement = xmlSAX2StartElement;
14516         ctxt->sax->endElement = xmlSAX2EndElement;
14517         ctxt->sax->startElementNs = NULL;
14518         ctxt->sax->endElementNs = NULL;
14519         ctxt->sax->initialized = 1;
14520         options -= XML_PARSE_SAX1;
14521 	ctxt->options |= XML_PARSE_SAX1;
14522     }
14523 #endif /* LIBXML_SAX1_ENABLED */
14524     if (options & XML_PARSE_NODICT) {
14525         ctxt->dictNames = 0;
14526         options -= XML_PARSE_NODICT;
14527 	ctxt->options |= XML_PARSE_NODICT;
14528     } else {
14529         ctxt->dictNames = 1;
14530     }
14531     if (options & XML_PARSE_NOCDATA) {
14532         ctxt->sax->cdataBlock = NULL;
14533         options -= XML_PARSE_NOCDATA;
14534 	ctxt->options |= XML_PARSE_NOCDATA;
14535     }
14536     if (options & XML_PARSE_NSCLEAN) {
14537 	ctxt->options |= XML_PARSE_NSCLEAN;
14538         options -= XML_PARSE_NSCLEAN;
14539     }
14540     if (options & XML_PARSE_NONET) {
14541 	ctxt->options |= XML_PARSE_NONET;
14542         options -= XML_PARSE_NONET;
14543     }
14544     if (options & XML_PARSE_COMPACT) {
14545 	ctxt->options |= XML_PARSE_COMPACT;
14546         options -= XML_PARSE_COMPACT;
14547     }
14548     if (options & XML_PARSE_OLD10) {
14549 	ctxt->options |= XML_PARSE_OLD10;
14550         options -= XML_PARSE_OLD10;
14551     }
14552     if (options & XML_PARSE_NOBASEFIX) {
14553 	ctxt->options |= XML_PARSE_NOBASEFIX;
14554         options -= XML_PARSE_NOBASEFIX;
14555     }
14556     if (options & XML_PARSE_HUGE) {
14557 	ctxt->options |= XML_PARSE_HUGE;
14558         options -= XML_PARSE_HUGE;
14559     }
14560     if (options & XML_PARSE_OLDSAX) {
14561 	ctxt->options |= XML_PARSE_OLDSAX;
14562         options -= XML_PARSE_OLDSAX;
14563     }
14564     ctxt->linenumbers = 1;
14565     return (options);
14566 }
14567 
14568 /**
14569  * xmlCtxtUseOptions:
14570  * @ctxt: an XML parser context
14571  * @options:  a combination of xmlParserOption
14572  *
14573  * Applies the options to the parser context
14574  *
14575  * Returns 0 in case of success, the set of unknown or unimplemented options
14576  *         in case of error.
14577  */
14578 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14579 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14580 {
14581    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14582 }
14583 
14584 /**
14585  * xmlDoRead:
14586  * @ctxt:  an XML parser context
14587  * @URL:  the base URL to use for the document
14588  * @encoding:  the document encoding, or NULL
14589  * @options:  a combination of xmlParserOption
14590  * @reuse:  keep the context for reuse
14591  *
14592  * Common front-end for the xmlRead functions
14593  *
14594  * Returns the resulting document tree or NULL
14595  */
14596 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14597 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14598           int options, int reuse)
14599 {
14600     xmlDocPtr ret;
14601 
14602     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14603     if (encoding != NULL) {
14604         xmlCharEncodingHandlerPtr hdlr;
14605 
14606 	hdlr = xmlFindCharEncodingHandler(encoding);
14607 	if (hdlr != NULL)
14608 	    xmlSwitchToEncoding(ctxt, hdlr);
14609     }
14610     if ((URL != NULL) && (ctxt->input != NULL) &&
14611         (ctxt->input->filename == NULL))
14612         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14613     xmlParseDocument(ctxt);
14614     if ((ctxt->wellFormed) || ctxt->recovery)
14615         ret = ctxt->myDoc;
14616     else {
14617         ret = NULL;
14618 	if (ctxt->myDoc != NULL) {
14619 	    xmlFreeDoc(ctxt->myDoc);
14620 	}
14621     }
14622     ctxt->myDoc = NULL;
14623     if (!reuse) {
14624 	xmlFreeParserCtxt(ctxt);
14625     }
14626 
14627     return (ret);
14628 }
14629 
14630 /**
14631  * xmlReadDoc:
14632  * @cur:  a pointer to a zero terminated string
14633  * @URL:  the base URL to use for the document
14634  * @encoding:  the document encoding, or NULL
14635  * @options:  a combination of xmlParserOption
14636  *
14637  * parse an XML in-memory document and build a tree.
14638  *
14639  * Returns the resulting document tree
14640  */
14641 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14642 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643 {
14644     xmlParserCtxtPtr ctxt;
14645 
14646     if (cur == NULL)
14647         return (NULL);
14648 
14649     ctxt = xmlCreateDocParserCtxt(cur);
14650     if (ctxt == NULL)
14651         return (NULL);
14652     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14653 }
14654 
14655 /**
14656  * xmlReadFile:
14657  * @filename:  a file or URL
14658  * @encoding:  the document encoding, or NULL
14659  * @options:  a combination of xmlParserOption
14660  *
14661  * parse an XML file from the filesystem or the network.
14662  *
14663  * Returns the resulting document tree
14664  */
14665 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14666 xmlReadFile(const char *filename, const char *encoding, int options)
14667 {
14668     xmlParserCtxtPtr ctxt;
14669 
14670     ctxt = xmlCreateURLParserCtxt(filename, options);
14671     if (ctxt == NULL)
14672         return (NULL);
14673     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14674 }
14675 
14676 /**
14677  * xmlReadMemory:
14678  * @buffer:  a pointer to a char array
14679  * @size:  the size of the array
14680  * @URL:  the base URL to use for the document
14681  * @encoding:  the document encoding, or NULL
14682  * @options:  a combination of xmlParserOption
14683  *
14684  * parse an XML in-memory document and build a tree.
14685  *
14686  * Returns the resulting document tree
14687  */
14688 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14689 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14690 {
14691     xmlParserCtxtPtr ctxt;
14692 
14693     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14694     if (ctxt == NULL)
14695         return (NULL);
14696     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14697 }
14698 
14699 /**
14700  * xmlReadFd:
14701  * @fd:  an open file descriptor
14702  * @URL:  the base URL to use for the document
14703  * @encoding:  the document encoding, or NULL
14704  * @options:  a combination of xmlParserOption
14705  *
14706  * parse an XML from a file descriptor and build a tree.
14707  * NOTE that the file descriptor will not be closed when the
14708  *      reader is closed or reset.
14709  *
14710  * Returns the resulting document tree
14711  */
14712 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14713 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14714 {
14715     xmlParserCtxtPtr ctxt;
14716     xmlParserInputBufferPtr input;
14717     xmlParserInputPtr stream;
14718 
14719     if (fd < 0)
14720         return (NULL);
14721 
14722     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14723     if (input == NULL)
14724         return (NULL);
14725     input->closecallback = NULL;
14726     ctxt = xmlNewParserCtxt();
14727     if (ctxt == NULL) {
14728         xmlFreeParserInputBuffer(input);
14729         return (NULL);
14730     }
14731     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14732     if (stream == NULL) {
14733         xmlFreeParserInputBuffer(input);
14734 	xmlFreeParserCtxt(ctxt);
14735         return (NULL);
14736     }
14737     inputPush(ctxt, stream);
14738     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14739 }
14740 
14741 /**
14742  * xmlReadIO:
14743  * @ioread:  an I/O read function
14744  * @ioclose:  an I/O close function
14745  * @ioctx:  an I/O handler
14746  * @URL:  the base URL to use for the document
14747  * @encoding:  the document encoding, or NULL
14748  * @options:  a combination of xmlParserOption
14749  *
14750  * parse an XML document from I/O functions and source and build a tree.
14751  *
14752  * Returns the resulting document tree
14753  */
14754 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14755 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14756           void *ioctx, const char *URL, const char *encoding, int options)
14757 {
14758     xmlParserCtxtPtr ctxt;
14759     xmlParserInputBufferPtr input;
14760     xmlParserInputPtr stream;
14761 
14762     if (ioread == NULL)
14763         return (NULL);
14764 
14765     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14766                                          XML_CHAR_ENCODING_NONE);
14767     if (input == NULL)
14768         return (NULL);
14769     ctxt = xmlNewParserCtxt();
14770     if (ctxt == NULL) {
14771         xmlFreeParserInputBuffer(input);
14772         return (NULL);
14773     }
14774     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14775     if (stream == NULL) {
14776         xmlFreeParserInputBuffer(input);
14777 	xmlFreeParserCtxt(ctxt);
14778         return (NULL);
14779     }
14780     inputPush(ctxt, stream);
14781     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14782 }
14783 
14784 /**
14785  * xmlCtxtReadDoc:
14786  * @ctxt:  an XML parser context
14787  * @cur:  a pointer to a zero terminated string
14788  * @URL:  the base URL to use for the document
14789  * @encoding:  the document encoding, or NULL
14790  * @options:  a combination of xmlParserOption
14791  *
14792  * parse an XML in-memory document and build a tree.
14793  * This reuses the existing @ctxt parser context
14794  *
14795  * Returns the resulting document tree
14796  */
14797 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14798 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14799                const char *URL, const char *encoding, int options)
14800 {
14801     xmlParserInputPtr stream;
14802 
14803     if (cur == NULL)
14804         return (NULL);
14805     if (ctxt == NULL)
14806         return (NULL);
14807 
14808     xmlCtxtReset(ctxt);
14809 
14810     stream = xmlNewStringInputStream(ctxt, cur);
14811     if (stream == NULL) {
14812         return (NULL);
14813     }
14814     inputPush(ctxt, stream);
14815     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14816 }
14817 
14818 /**
14819  * xmlCtxtReadFile:
14820  * @ctxt:  an XML parser context
14821  * @filename:  a file or URL
14822  * @encoding:  the document encoding, or NULL
14823  * @options:  a combination of xmlParserOption
14824  *
14825  * parse an XML file from the filesystem or the network.
14826  * This reuses the existing @ctxt parser context
14827  *
14828  * Returns the resulting document tree
14829  */
14830 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14831 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14832                 const char *encoding, int options)
14833 {
14834     xmlParserInputPtr stream;
14835 
14836     if (filename == NULL)
14837         return (NULL);
14838     if (ctxt == NULL)
14839         return (NULL);
14840 
14841     xmlCtxtReset(ctxt);
14842 
14843     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14844     if (stream == NULL) {
14845         return (NULL);
14846     }
14847     inputPush(ctxt, stream);
14848     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14849 }
14850 
14851 /**
14852  * xmlCtxtReadMemory:
14853  * @ctxt:  an XML parser context
14854  * @buffer:  a pointer to a char array
14855  * @size:  the size of the array
14856  * @URL:  the base URL to use for the document
14857  * @encoding:  the document encoding, or NULL
14858  * @options:  a combination of xmlParserOption
14859  *
14860  * parse an XML in-memory document and build a tree.
14861  * This reuses the existing @ctxt parser context
14862  *
14863  * Returns the resulting document tree
14864  */
14865 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14866 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14867                   const char *URL, const char *encoding, int options)
14868 {
14869     xmlParserInputBufferPtr input;
14870     xmlParserInputPtr stream;
14871 
14872     if (ctxt == NULL)
14873         return (NULL);
14874     if (buffer == NULL)
14875         return (NULL);
14876 
14877     xmlCtxtReset(ctxt);
14878 
14879     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14880     if (input == NULL) {
14881 	return(NULL);
14882     }
14883 
14884     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14885     if (stream == NULL) {
14886 	xmlFreeParserInputBuffer(input);
14887 	return(NULL);
14888     }
14889 
14890     inputPush(ctxt, stream);
14891     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14892 }
14893 
14894 /**
14895  * xmlCtxtReadFd:
14896  * @ctxt:  an XML parser context
14897  * @fd:  an open file descriptor
14898  * @URL:  the base URL to use for the document
14899  * @encoding:  the document encoding, or NULL
14900  * @options:  a combination of xmlParserOption
14901  *
14902  * parse an XML from a file descriptor and build a tree.
14903  * This reuses the existing @ctxt parser context
14904  * NOTE that the file descriptor will not be closed when the
14905  *      reader is closed or reset.
14906  *
14907  * Returns the resulting document tree
14908  */
14909 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14910 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14911               const char *URL, const char *encoding, int options)
14912 {
14913     xmlParserInputBufferPtr input;
14914     xmlParserInputPtr stream;
14915 
14916     if (fd < 0)
14917         return (NULL);
14918     if (ctxt == NULL)
14919         return (NULL);
14920 
14921     xmlCtxtReset(ctxt);
14922 
14923 
14924     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14925     if (input == NULL)
14926         return (NULL);
14927     input->closecallback = NULL;
14928     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929     if (stream == NULL) {
14930         xmlFreeParserInputBuffer(input);
14931         return (NULL);
14932     }
14933     inputPush(ctxt, stream);
14934     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14935 }
14936 
14937 /**
14938  * xmlCtxtReadIO:
14939  * @ctxt:  an XML parser context
14940  * @ioread:  an I/O read function
14941  * @ioclose:  an I/O close function
14942  * @ioctx:  an I/O handler
14943  * @URL:  the base URL to use for the document
14944  * @encoding:  the document encoding, or NULL
14945  * @options:  a combination of xmlParserOption
14946  *
14947  * parse an XML document from I/O functions and source and build a tree.
14948  * This reuses the existing @ctxt parser context
14949  *
14950  * Returns the resulting document tree
14951  */
14952 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14953 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14954               xmlInputCloseCallback ioclose, void *ioctx,
14955 	      const char *URL,
14956               const char *encoding, int options)
14957 {
14958     xmlParserInputBufferPtr input;
14959     xmlParserInputPtr stream;
14960 
14961     if (ioread == NULL)
14962         return (NULL);
14963     if (ctxt == NULL)
14964         return (NULL);
14965 
14966     xmlCtxtReset(ctxt);
14967 
14968     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14969                                          XML_CHAR_ENCODING_NONE);
14970     if (input == NULL)
14971         return (NULL);
14972     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14973     if (stream == NULL) {
14974         xmlFreeParserInputBuffer(input);
14975         return (NULL);
14976     }
14977     inputPush(ctxt, stream);
14978     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14979 }
14980 
14981 #define bottom_parser
14982 #include "elfgcchack.h"
14983