1 /* libxml2 - Library for parsing XML documents
2 * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 *
4 * This file is not part of the GNU gettext program, but is used with
5 * GNU gettext.
6 *
7 * The original copyright notice is as follows:
8 */
9
10 /*
11 * Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved.
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is fur-
18 * nished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25 * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 *
31 * daniel@veillard.com
32 */
33
34 /*
35 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
36 * implemented on top of the SAX interfaces
37 *
38 * References:
39 * The XML specification:
40 * http://www.w3.org/TR/REC-xml
41 * Original 1.0 version:
42 * http://www.w3.org/TR/1998/REC-xml-19980210
43 * XML second edition working draft
44 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
45 *
46 * Okay this is a big file, the parser core is around 7000 lines, then it
47 * is followed by the progressive parser top routines, then the various
48 * high level APIs to call the parser and a few miscellaneous functions.
49 * A number of helper functions and deprecated ones have been moved to
50 * parserInternals.c to reduce this file size.
51 * As much as possible the functions are associated with their relative
52 * production in the XML specification. A few productions defining the
53 * different ranges of character are actually implanted either in
54 * parserInternals.h or parserInternals.c
55 * The DOM tree build is realized from the default SAX callbacks in
56 * the module SAX.c.
57 * The routines doing the validation checks are in valid.c and called either
58 * from the SAX callbacks or as standalone functions using a preparsed
59 * document.
60 */
61
62 /* To avoid EBCDIC trouble when parsing on zOS */
63 #if defined(__MVS__)
64 #pragma convert("ISO8859-1")
65 #endif
66
67 #define IN_LIBXML
68 #include "libxml.h"
69
70 #if defined(_WIN32) && !defined (__CYGWIN__)
71 #define XML_DIR_SEP '\\'
72 #else
73 #define XML_DIR_SEP '/'
74 #endif
75
76 #include <stdlib.h>
77 #include <limits.h>
78 #include <string.h>
79 #include <stdarg.h>
80 #include <stddef.h>
81 #include <libxml/xmlmemory.h>
82 #include <libxml/threads.h>
83 #include <libxml/globals.h>
84 #include <libxml/tree.h>
85 #include <libxml/parser.h>
86 #include <libxml/parserInternals.h>
87 #include <libxml/valid.h>
88 #include <libxml/entities.h>
89 #include <libxml/xmlerror.h>
90 #include <libxml/encoding.h>
91 #include <libxml/xmlIO.h>
92 #include <libxml/uri.h>
93 #ifdef LIBXML_CATALOG_ENABLED
94 #include <libxml/catalog.h>
95 #endif
96 #ifdef LIBXML_SCHEMAS_ENABLED
97 #include <libxml/xmlschemastypes.h>
98 #include <libxml/relaxng.h>
99 #endif
100 #ifdef HAVE_CTYPE_H
101 #include <ctype.h>
102 #endif
103 #ifdef HAVE_STDLIB_H
104 #include <stdlib.h>
105 #endif
106 #ifdef HAVE_SYS_STAT_H
107 #include <sys/stat.h>
108 #endif
109 #ifdef HAVE_FCNTL_H
110 #include <fcntl.h>
111 #endif
112 #ifdef HAVE_UNISTD_H
113 #include <unistd.h>
114 #endif
115
116 #include "buf.h"
117 #include "enc.h"
118
119 static void
120 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
121
122 static xmlParserCtxtPtr
123 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
124 const xmlChar *base, xmlParserCtxtPtr pctx);
125
126 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
127
128 /************************************************************************
129 * *
130 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
131 * *
132 ************************************************************************/
133
134 #define XML_PARSER_BIG_ENTITY 1000
135 #define XML_PARSER_LOT_ENTITY 5000
136
137 /*
138 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
139 * replacement over the size in byte of the input indicates that you have
140 * and eponential behaviour. A value of 10 correspond to at least 3 entity
141 * replacement per byte of input.
142 */
143 #define XML_PARSER_NON_LINEAR 10
144
145 /*
146 * xmlParserEntityCheck
147 *
148 * Function to check non-linear entity expansion behaviour
149 * This is here to detect and stop exponential linear entity expansion
150 * This is not a limitation of the parser but a safety
151 * boundary feature. It can be disabled with the XML_PARSE_HUGE
152 * parser option.
153 */
154 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)155 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
156 xmlEntityPtr ent, size_t replacement)
157 {
158 size_t consumed = 0;
159
160 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
161 return (0);
162 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
163 return (1);
164
165 /*
166 * This may look absurd but is needed to detect
167 * entities problems
168 */
169 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
170 (ent->content != NULL) && (ent->checked == 0) &&
171 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
172 unsigned long oldnbent = ctxt->nbentities;
173 xmlChar *rep;
174
175 ent->checked = 1;
176
177 ++ctxt->depth;
178 rep = xmlStringDecodeEntities(ctxt, ent->content,
179 XML_SUBSTITUTE_REF, 0, 0, 0);
180 --ctxt->depth;
181 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
182 ent->content[0] = 0;
183 }
184
185 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
186 if (rep != NULL) {
187 if (xmlStrchr(rep, '<'))
188 ent->checked |= 1;
189 xmlFree(rep);
190 rep = NULL;
191 }
192 }
193 if (replacement != 0) {
194 if (replacement < XML_MAX_TEXT_LENGTH)
195 return(0);
196
197 /*
198 * If the volume of entity copy reaches 10 times the
199 * amount of parsed data and over the large text threshold
200 * then that's very likely to be an abuse.
201 */
202 if (ctxt->input != NULL) {
203 consumed = ctxt->input->consumed +
204 (ctxt->input->cur - ctxt->input->base);
205 }
206 consumed += ctxt->sizeentities;
207
208 if (replacement < XML_PARSER_NON_LINEAR * consumed)
209 return(0);
210 } else if (size != 0) {
211 /*
212 * Do the check based on the replacement size of the entity
213 */
214 if (size < XML_PARSER_BIG_ENTITY)
215 return(0);
216
217 /*
218 * A limit on the amount of text data reasonably used
219 */
220 if (ctxt->input != NULL) {
221 consumed = ctxt->input->consumed +
222 (ctxt->input->cur - ctxt->input->base);
223 }
224 consumed += ctxt->sizeentities;
225
226 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
227 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
228 return (0);
229 } else if (ent != NULL) {
230 /*
231 * use the number of parsed entities in the replacement
232 */
233 size = ent->checked / 2;
234
235 /*
236 * The amount of data parsed counting entities size only once
237 */
238 if (ctxt->input != NULL) {
239 consumed = ctxt->input->consumed +
240 (ctxt->input->cur - ctxt->input->base);
241 }
242 consumed += ctxt->sizeentities;
243
244 /*
245 * Check the density of entities for the amount of data
246 * knowing an entity reference will take at least 3 bytes
247 */
248 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
249 return (0);
250 } else {
251 /*
252 * strange we got no data for checking
253 */
254 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
255 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
256 (ctxt->nbentities <= 10000))
257 return (0);
258 }
259 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
260 return (1);
261 }
262
263 /**
264 * xmlParserMaxDepth:
265 *
266 * arbitrary depth limit for the XML documents that we allow to
267 * process. This is not a limitation of the parser but a safety
268 * boundary feature. It can be disabled with the XML_PARSE_HUGE
269 * parser option.
270 */
271 unsigned int xmlParserMaxDepth = 256;
272
273
274
275 #define SAX2 1
276 #define XML_PARSER_BIG_BUFFER_SIZE 300
277 #define XML_PARSER_BUFFER_SIZE 100
278 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
279
280 /**
281 * XML_PARSER_CHUNK_SIZE
282 *
283 * When calling GROW that's the minimal amount of data
284 * the parser expected to have received. It is not a hard
285 * limit but an optimization when reading strings like Names
286 * It is not strictly needed as long as inputs available characters
287 * are followed by 0, which should be provided by the I/O level
288 */
289 #define XML_PARSER_CHUNK_SIZE 100
290
291 /*
292 * List of XML prefixed PI allowed by W3C specs
293 */
294
295 static const char *xmlW3CPIs[] = {
296 "xml-stylesheet",
297 "xml-model",
298 NULL
299 };
300
301
302 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
303 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
304 const xmlChar **str);
305
306 static xmlParserErrors
307 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
308 xmlSAXHandlerPtr sax,
309 void *user_data, int depth, const xmlChar *URL,
310 const xmlChar *ID, xmlNodePtr *list);
311
312 static int
313 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
314 const char *encoding);
315 #ifdef LIBXML_LEGACY_ENABLED
316 static void
317 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
318 xmlNodePtr lastNode);
319 #endif /* LIBXML_LEGACY_ENABLED */
320
321 static xmlParserErrors
322 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
323 const xmlChar *string, void *user_data, xmlNodePtr *lst);
324
325 static int
326 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
327
328 /************************************************************************
329 * *
330 * Some factorized error routines *
331 * *
332 ************************************************************************/
333
334 /**
335 * xmlErrAttributeDup:
336 * @ctxt: an XML parser context
337 * @prefix: the attribute prefix
338 * @localname: the attribute localname
339 *
340 * Handle a redefinition of attribute error
341 */
342 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)343 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
344 const xmlChar * localname)
345 {
346 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
347 (ctxt->instate == XML_PARSER_EOF))
348 return;
349 if (ctxt != NULL)
350 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
351
352 if (prefix == NULL)
353 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
354 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
355 (const char *) localname, NULL, NULL, 0, 0,
356 "Attribute %s redefined\n", localname);
357 else
358 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
359 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
360 (const char *) prefix, (const char *) localname,
361 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
362 localname);
363 if (ctxt != NULL) {
364 ctxt->wellFormed = 0;
365 if (ctxt->recovery == 0)
366 ctxt->disableSAX = 1;
367 }
368 }
369
370 /**
371 * xmlFatalErr:
372 * @ctxt: an XML parser context
373 * @error: the error number
374 * @extra: extra information string
375 *
376 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
377 */
378 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)379 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
380 {
381 const char *errmsg;
382
383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
386 switch (error) {
387 case XML_ERR_INVALID_HEX_CHARREF:
388 errmsg = "CharRef: invalid hexadecimal value";
389 break;
390 case XML_ERR_INVALID_DEC_CHARREF:
391 errmsg = "CharRef: invalid decimal value";
392 break;
393 case XML_ERR_INVALID_CHARREF:
394 errmsg = "CharRef: invalid value";
395 break;
396 case XML_ERR_INTERNAL_ERROR:
397 errmsg = "internal error";
398 break;
399 case XML_ERR_PEREF_AT_EOF:
400 errmsg = "PEReference at end of document";
401 break;
402 case XML_ERR_PEREF_IN_PROLOG:
403 errmsg = "PEReference in prolog";
404 break;
405 case XML_ERR_PEREF_IN_EPILOG:
406 errmsg = "PEReference in epilog";
407 break;
408 case XML_ERR_PEREF_NO_NAME:
409 errmsg = "PEReference: no name";
410 break;
411 case XML_ERR_PEREF_SEMICOL_MISSING:
412 errmsg = "PEReference: expecting ';'";
413 break;
414 case XML_ERR_ENTITY_LOOP:
415 errmsg = "Detected an entity reference loop";
416 break;
417 case XML_ERR_ENTITY_NOT_STARTED:
418 errmsg = "EntityValue: \" or ' expected";
419 break;
420 case XML_ERR_ENTITY_PE_INTERNAL:
421 errmsg = "PEReferences forbidden in internal subset";
422 break;
423 case XML_ERR_ENTITY_NOT_FINISHED:
424 errmsg = "EntityValue: \" or ' expected";
425 break;
426 case XML_ERR_ATTRIBUTE_NOT_STARTED:
427 errmsg = "AttValue: \" or ' expected";
428 break;
429 case XML_ERR_LT_IN_ATTRIBUTE:
430 errmsg = "Unescaped '<' not allowed in attributes values";
431 break;
432 case XML_ERR_LITERAL_NOT_STARTED:
433 errmsg = "SystemLiteral \" or ' expected";
434 break;
435 case XML_ERR_LITERAL_NOT_FINISHED:
436 errmsg = "Unfinished System or Public ID \" or ' expected";
437 break;
438 case XML_ERR_MISPLACED_CDATA_END:
439 errmsg = "Sequence ']]>' not allowed in content";
440 break;
441 case XML_ERR_URI_REQUIRED:
442 errmsg = "SYSTEM or PUBLIC, the URI is missing";
443 break;
444 case XML_ERR_PUBID_REQUIRED:
445 errmsg = "PUBLIC, the Public Identifier is missing";
446 break;
447 case XML_ERR_HYPHEN_IN_COMMENT:
448 errmsg = "Comment must not contain '--' (double-hyphen)";
449 break;
450 case XML_ERR_PI_NOT_STARTED:
451 errmsg = "xmlParsePI : no target name";
452 break;
453 case XML_ERR_RESERVED_XML_NAME:
454 errmsg = "Invalid PI name";
455 break;
456 case XML_ERR_NOTATION_NOT_STARTED:
457 errmsg = "NOTATION: Name expected here";
458 break;
459 case XML_ERR_NOTATION_NOT_FINISHED:
460 errmsg = "'>' required to close NOTATION declaration";
461 break;
462 case XML_ERR_VALUE_REQUIRED:
463 errmsg = "Entity value required";
464 break;
465 case XML_ERR_URI_FRAGMENT:
466 errmsg = "Fragment not allowed";
467 break;
468 case XML_ERR_ATTLIST_NOT_STARTED:
469 errmsg = "'(' required to start ATTLIST enumeration";
470 break;
471 case XML_ERR_NMTOKEN_REQUIRED:
472 errmsg = "NmToken expected in ATTLIST enumeration";
473 break;
474 case XML_ERR_ATTLIST_NOT_FINISHED:
475 errmsg = "')' required to finish ATTLIST enumeration";
476 break;
477 case XML_ERR_MIXED_NOT_STARTED:
478 errmsg = "MixedContentDecl : '|' or ')*' expected";
479 break;
480 case XML_ERR_PCDATA_REQUIRED:
481 errmsg = "MixedContentDecl : '#PCDATA' expected";
482 break;
483 case XML_ERR_ELEMCONTENT_NOT_STARTED:
484 errmsg = "ContentDecl : Name or '(' expected";
485 break;
486 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
487 errmsg = "ContentDecl : ',' '|' or ')' expected";
488 break;
489 case XML_ERR_PEREF_IN_INT_SUBSET:
490 errmsg =
491 "PEReference: forbidden within markup decl in internal subset";
492 break;
493 case XML_ERR_GT_REQUIRED:
494 errmsg = "expected '>'";
495 break;
496 case XML_ERR_CONDSEC_INVALID:
497 errmsg = "XML conditional section '[' expected";
498 break;
499 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
500 errmsg = "Content error in the external subset";
501 break;
502 case XML_ERR_CONDSEC_INVALID_KEYWORD:
503 errmsg =
504 "conditional section INCLUDE or IGNORE keyword expected";
505 break;
506 case XML_ERR_CONDSEC_NOT_FINISHED:
507 errmsg = "XML conditional section not closed";
508 break;
509 case XML_ERR_XMLDECL_NOT_STARTED:
510 errmsg = "Text declaration '<?xml' required";
511 break;
512 case XML_ERR_XMLDECL_NOT_FINISHED:
513 errmsg = "parsing XML declaration: '?>' expected";
514 break;
515 case XML_ERR_EXT_ENTITY_STANDALONE:
516 errmsg = "external parsed entities cannot be standalone";
517 break;
518 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
519 errmsg = "EntityRef: expecting ';'";
520 break;
521 case XML_ERR_DOCTYPE_NOT_FINISHED:
522 errmsg = "DOCTYPE improperly terminated";
523 break;
524 case XML_ERR_LTSLASH_REQUIRED:
525 errmsg = "EndTag: '</' not found";
526 break;
527 case XML_ERR_EQUAL_REQUIRED:
528 errmsg = "expected '='";
529 break;
530 case XML_ERR_STRING_NOT_CLOSED:
531 errmsg = "String not closed expecting \" or '";
532 break;
533 case XML_ERR_STRING_NOT_STARTED:
534 errmsg = "String not started expecting ' or \"";
535 break;
536 case XML_ERR_ENCODING_NAME:
537 errmsg = "Invalid XML encoding name";
538 break;
539 case XML_ERR_STANDALONE_VALUE:
540 errmsg = "standalone accepts only 'yes' or 'no'";
541 break;
542 case XML_ERR_DOCUMENT_EMPTY:
543 errmsg = "Document is empty";
544 break;
545 case XML_ERR_DOCUMENT_END:
546 errmsg = "Extra content at the end of the document";
547 break;
548 case XML_ERR_NOT_WELL_BALANCED:
549 errmsg = "chunk is not well balanced";
550 break;
551 case XML_ERR_EXTRA_CONTENT:
552 errmsg = "extra content at the end of well balanced chunk";
553 break;
554 case XML_ERR_VERSION_MISSING:
555 errmsg = "Malformed declaration expecting version";
556 break;
557 case XML_ERR_NAME_TOO_LONG:
558 errmsg = "Name too long use XML_PARSE_HUGE option";
559 break;
560 #if 0
561 case:
562 errmsg = "";
563 break;
564 #endif
565 default:
566 errmsg = "Unregistered error message";
567 }
568 if (ctxt != NULL)
569 ctxt->errNo = error;
570 if (info == NULL) {
571 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
572 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
573 errmsg);
574 } else {
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
576 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
577 errmsg, info);
578 }
579 if (ctxt != NULL) {
580 ctxt->wellFormed = 0;
581 if (ctxt->recovery == 0)
582 ctxt->disableSAX = 1;
583 }
584 }
585
586 /**
587 * xmlFatalErrMsg:
588 * @ctxt: an XML parser context
589 * @error: the error number
590 * @msg: the error message
591 *
592 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
593 */
594 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)595 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg)
597 {
598 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
599 (ctxt->instate == XML_PARSER_EOF))
600 return;
601 if (ctxt != NULL)
602 ctxt->errNo = error;
603 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
604 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
610 }
611
612 /**
613 * xmlWarningMsg:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: extra data
618 * @str2: extra data
619 *
620 * Handle a warning.
621 */
622 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)623 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624 const char *msg, const xmlChar *str1, const xmlChar *str2)
625 {
626 xmlStructuredErrorFunc schannel = NULL;
627
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
632 (ctxt->sax->initialized == XML_SAX2_MAGIC))
633 schannel = ctxt->sax->serror;
634 if (ctxt != NULL) {
635 __xmlRaiseError(schannel,
636 (ctxt->sax) ? ctxt->sax->warning : NULL,
637 ctxt->userData,
638 ctxt, NULL, XML_FROM_PARSER, error,
639 XML_ERR_WARNING, NULL, 0,
640 (const char *) str1, (const char *) str2, NULL, 0, 0,
641 msg, (const char *) str1, (const char *) str2);
642 } else {
643 __xmlRaiseError(schannel, NULL, NULL,
644 ctxt, NULL, XML_FROM_PARSER, error,
645 XML_ERR_WARNING, NULL, 0,
646 (const char *) str1, (const char *) str2, NULL, 0, 0,
647 msg, (const char *) str1, (const char *) str2);
648 }
649 }
650
651 /**
652 * xmlValidityError:
653 * @ctxt: an XML parser context
654 * @error: the error number
655 * @msg: the error message
656 * @str1: extra data
657 *
658 * Handle a validity error.
659 */
660 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)661 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
662 const char *msg, const xmlChar *str1, const xmlChar *str2)
663 {
664 xmlStructuredErrorFunc schannel = NULL;
665
666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667 (ctxt->instate == XML_PARSER_EOF))
668 return;
669 if (ctxt != NULL) {
670 ctxt->errNo = error;
671 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
672 schannel = ctxt->sax->serror;
673 }
674 if (ctxt != NULL) {
675 __xmlRaiseError(schannel,
676 ctxt->vctxt.error, ctxt->vctxt.userData,
677 ctxt, NULL, XML_FROM_DTD, error,
678 XML_ERR_ERROR, NULL, 0, (const char *) str1,
679 (const char *) str2, NULL, 0, 0,
680 msg, (const char *) str1, (const char *) str2);
681 ctxt->valid = 0;
682 } else {
683 __xmlRaiseError(schannel, NULL, NULL,
684 ctxt, NULL, XML_FROM_DTD, error,
685 XML_ERR_ERROR, NULL, 0, (const char *) str1,
686 (const char *) str2, NULL, 0, 0,
687 msg, (const char *) str1, (const char *) str2);
688 }
689 }
690
691 /**
692 * xmlFatalErrMsgInt:
693 * @ctxt: an XML parser context
694 * @error: the error number
695 * @msg: the error message
696 * @val: an integer value
697 *
698 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
699 */
700 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)701 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
702 const char *msg, int val)
703 {
704 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
705 (ctxt->instate == XML_PARSER_EOF))
706 return;
707 if (ctxt != NULL)
708 ctxt->errNo = error;
709 __xmlRaiseError(NULL, NULL, NULL,
710 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
711 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
712 if (ctxt != NULL) {
713 ctxt->wellFormed = 0;
714 if (ctxt->recovery == 0)
715 ctxt->disableSAX = 1;
716 }
717 }
718
719 /**
720 * xmlFatalErrMsgStrIntStr:
721 * @ctxt: an XML parser context
722 * @error: the error number
723 * @msg: the error message
724 * @str1: an string info
725 * @val: an integer value
726 * @str2: an string info
727 *
728 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
729 */
730 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)731 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
732 const char *msg, const xmlChar *str1, int val,
733 const xmlChar *str2)
734 {
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL,
741 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) str1, (const char *) str2,
743 NULL, val, 0, msg, str1, val, str2);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749 }
750
751 /**
752 * xmlFatalErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
759 */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763 {
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_FATAL,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773 if (ctxt != NULL) {
774 ctxt->wellFormed = 0;
775 if (ctxt->recovery == 0)
776 ctxt->disableSAX = 1;
777 }
778 }
779
780 /**
781 * xmlErrMsgStr:
782 * @ctxt: an XML parser context
783 * @error: the error number
784 * @msg: the error message
785 * @val: a string value
786 *
787 * Handle a non fatal parser error
788 */
789 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)790 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
791 const char *msg, const xmlChar * val)
792 {
793 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
794 (ctxt->instate == XML_PARSER_EOF))
795 return;
796 if (ctxt != NULL)
797 ctxt->errNo = error;
798 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
799 XML_FROM_PARSER, error, XML_ERR_ERROR,
800 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
801 val);
802 }
803
804 /**
805 * xmlNsErr:
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
813 */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819 {
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 if (ctxt != NULL)
824 ctxt->errNo = error;
825 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
826 XML_ERR_ERROR, NULL, 0, (const char *) info1,
827 (const char *) info2, (const char *) info3, 0, 0, msg,
828 info1, info2, info3);
829 if (ctxt != NULL)
830 ctxt->nsWellFormed = 0;
831 }
832
833 /**
834 * xmlNsWarn
835 * @ctxt: an XML parser context
836 * @error: the error number
837 * @msg: the message
838 * @info1: extra information string
839 * @info2: extra information string
840 *
841 * Handle a namespace warning error
842 */
843 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)844 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
845 const char *msg,
846 const xmlChar * info1, const xmlChar * info2,
847 const xmlChar * info3)
848 {
849 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
850 (ctxt->instate == XML_PARSER_EOF))
851 return;
852 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
853 XML_ERR_WARNING, NULL, 0, (const char *) info1,
854 (const char *) info2, (const char *) info3, 0, 0, msg,
855 info1, info2, info3);
856 }
857
858 /************************************************************************
859 * *
860 * Library wide options *
861 * *
862 ************************************************************************/
863
864 /**
865 * xmlHasFeature:
866 * @feature: the feature to be examined
867 *
868 * Examines if the library has been compiled with a given feature.
869 *
870 * Returns a non-zero value if the feature exist, otherwise zero.
871 * Returns zero (0) if the feature does not exist or an unknown
872 * unknown feature is requested, non-zero otherwise.
873 */
874 int
xmlHasFeature(xmlFeature feature)875 xmlHasFeature(xmlFeature feature)
876 {
877 switch (feature) {
878 case XML_WITH_THREAD:
879 #ifdef LIBXML_THREAD_ENABLED
880 return(1);
881 #else
882 return(0);
883 #endif
884 case XML_WITH_TREE:
885 #ifdef LIBXML_TREE_ENABLED
886 return(1);
887 #else
888 return(0);
889 #endif
890 case XML_WITH_OUTPUT:
891 #ifdef LIBXML_OUTPUT_ENABLED
892 return(1);
893 #else
894 return(0);
895 #endif
896 case XML_WITH_PUSH:
897 #ifdef LIBXML_PUSH_ENABLED
898 return(1);
899 #else
900 return(0);
901 #endif
902 case XML_WITH_READER:
903 #ifdef LIBXML_READER_ENABLED
904 return(1);
905 #else
906 return(0);
907 #endif
908 case XML_WITH_PATTERN:
909 #ifdef LIBXML_PATTERN_ENABLED
910 return(1);
911 #else
912 return(0);
913 #endif
914 case XML_WITH_WRITER:
915 #ifdef LIBXML_WRITER_ENABLED
916 return(1);
917 #else
918 return(0);
919 #endif
920 case XML_WITH_SAX1:
921 #ifdef LIBXML_SAX1_ENABLED
922 return(1);
923 #else
924 return(0);
925 #endif
926 case XML_WITH_FTP:
927 #ifdef LIBXML_FTP_ENABLED
928 return(1);
929 #else
930 return(0);
931 #endif
932 case XML_WITH_HTTP:
933 #ifdef LIBXML_HTTP_ENABLED
934 return(1);
935 #else
936 return(0);
937 #endif
938 case XML_WITH_VALID:
939 #ifdef LIBXML_VALID_ENABLED
940 return(1);
941 #else
942 return(0);
943 #endif
944 case XML_WITH_HTML:
945 #ifdef LIBXML_HTML_ENABLED
946 return(1);
947 #else
948 return(0);
949 #endif
950 case XML_WITH_LEGACY:
951 #ifdef LIBXML_LEGACY_ENABLED
952 return(1);
953 #else
954 return(0);
955 #endif
956 case XML_WITH_C14N:
957 #ifdef LIBXML_C14N_ENABLED
958 return(1);
959 #else
960 return(0);
961 #endif
962 case XML_WITH_CATALOG:
963 #ifdef LIBXML_CATALOG_ENABLED
964 return(1);
965 #else
966 return(0);
967 #endif
968 case XML_WITH_XPATH:
969 #ifdef LIBXML_XPATH_ENABLED
970 return(1);
971 #else
972 return(0);
973 #endif
974 case XML_WITH_XPTR:
975 #ifdef LIBXML_XPTR_ENABLED
976 return(1);
977 #else
978 return(0);
979 #endif
980 case XML_WITH_XINCLUDE:
981 #ifdef LIBXML_XINCLUDE_ENABLED
982 return(1);
983 #else
984 return(0);
985 #endif
986 case XML_WITH_ICONV:
987 #ifdef LIBXML_ICONV_ENABLED
988 return(1);
989 #else
990 return(0);
991 #endif
992 case XML_WITH_ISO8859X:
993 #ifdef LIBXML_ISO8859X_ENABLED
994 return(1);
995 #else
996 return(0);
997 #endif
998 case XML_WITH_UNICODE:
999 #ifdef LIBXML_UNICODE_ENABLED
1000 return(1);
1001 #else
1002 return(0);
1003 #endif
1004 case XML_WITH_REGEXP:
1005 #ifdef LIBXML_REGEXP_ENABLED
1006 return(1);
1007 #else
1008 return(0);
1009 #endif
1010 case XML_WITH_AUTOMATA:
1011 #ifdef LIBXML_AUTOMATA_ENABLED
1012 return(1);
1013 #else
1014 return(0);
1015 #endif
1016 case XML_WITH_EXPR:
1017 #ifdef LIBXML_EXPR_ENABLED
1018 return(1);
1019 #else
1020 return(0);
1021 #endif
1022 case XML_WITH_SCHEMAS:
1023 #ifdef LIBXML_SCHEMAS_ENABLED
1024 return(1);
1025 #else
1026 return(0);
1027 #endif
1028 case XML_WITH_SCHEMATRON:
1029 #ifdef LIBXML_SCHEMATRON_ENABLED
1030 return(1);
1031 #else
1032 return(0);
1033 #endif
1034 case XML_WITH_MODULES:
1035 #ifdef LIBXML_MODULES_ENABLED
1036 return(1);
1037 #else
1038 return(0);
1039 #endif
1040 case XML_WITH_DEBUG:
1041 #ifdef LIBXML_DEBUG_ENABLED
1042 return(1);
1043 #else
1044 return(0);
1045 #endif
1046 case XML_WITH_DEBUG_MEM:
1047 #ifdef DEBUG_MEMORY_LOCATION
1048 return(1);
1049 #else
1050 return(0);
1051 #endif
1052 case XML_WITH_DEBUG_RUN:
1053 #ifdef LIBXML_DEBUG_RUNTIME
1054 return(1);
1055 #else
1056 return(0);
1057 #endif
1058 case XML_WITH_ZLIB:
1059 #ifdef LIBXML_ZLIB_ENABLED
1060 return(1);
1061 #else
1062 return(0);
1063 #endif
1064 case XML_WITH_LZMA:
1065 #ifdef LIBXML_LZMA_ENABLED
1066 return(1);
1067 #else
1068 return(0);
1069 #endif
1070 case XML_WITH_ICU:
1071 #ifdef LIBXML_ICU_ENABLED
1072 return(1);
1073 #else
1074 return(0);
1075 #endif
1076 default:
1077 break;
1078 }
1079 return(0);
1080 }
1081
1082 /************************************************************************
1083 * *
1084 * SAX2 defaulted attributes handling *
1085 * *
1086 ************************************************************************/
1087
1088 /**
1089 * xmlDetectSAX2:
1090 * @ctxt: an XML parser context
1091 *
1092 * Do the SAX2 detection and specific intialization
1093 */
1094 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1095 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1096 if (ctxt == NULL) return;
1097 #ifdef LIBXML_SAX1_ENABLED
1098 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1099 ((ctxt->sax->startElementNs != NULL) ||
1100 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1101 #else
1102 ctxt->sax2 = 1;
1103 #endif /* LIBXML_SAX1_ENABLED */
1104
1105 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1106 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1107 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1108 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1109 (ctxt->str_xml_ns == NULL)) {
1110 xmlErrMemory(ctxt, NULL);
1111 }
1112 }
1113
1114 typedef struct _xmlDefAttrs xmlDefAttrs;
1115 typedef xmlDefAttrs *xmlDefAttrsPtr;
1116 struct _xmlDefAttrs {
1117 int nbAttrs; /* number of defaulted attributes on that element */
1118 int maxAttrs; /* the size of the array */
1119 #if __STDC_VERSION__ >= 199901L && !defined __HP_cc
1120 /* Using a C99 flexible array member avoids UBSan errors. */
1121 const xmlChar *values[]; /* array of localname/prefix/values/external */
1122 #else
1123 const xmlChar *values[5];
1124 #endif
1125 };
1126
1127 /**
1128 * xmlAttrNormalizeSpace:
1129 * @src: the source string
1130 * @dst: the target string
1131 *
1132 * Normalize the space in non CDATA attribute values:
1133 * If the attribute type is not CDATA, then the XML processor MUST further
1134 * process the normalized attribute value by discarding any leading and
1135 * trailing space (#x20) characters, and by replacing sequences of space
1136 * (#x20) characters by a single space (#x20) character.
1137 * Note that the size of dst need to be at least src, and if one doesn't need
1138 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1139 * passing src as dst is just fine.
1140 *
1141 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1142 * is needed.
1143 */
1144 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1145 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1146 {
1147 if ((src == NULL) || (dst == NULL))
1148 return(NULL);
1149
1150 while (*src == 0x20) src++;
1151 while (*src != 0) {
1152 if (*src == 0x20) {
1153 while (*src == 0x20) src++;
1154 if (*src != 0)
1155 *dst++ = 0x20;
1156 } else {
1157 *dst++ = *src++;
1158 }
1159 }
1160 *dst = 0;
1161 if (dst == src)
1162 return(NULL);
1163 return(dst);
1164 }
1165
1166 /**
1167 * xmlAttrNormalizeSpace2:
1168 * @src: the source string
1169 *
1170 * Normalize the space in non CDATA attribute values, a slightly more complex
1171 * front end to avoid allocation problems when running on attribute values
1172 * coming from the input.
1173 *
1174 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1175 * is needed.
1176 */
1177 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1178 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1179 {
1180 int i;
1181 int remove_head = 0;
1182 int need_realloc = 0;
1183 const xmlChar *cur;
1184
1185 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1186 return(NULL);
1187 i = *len;
1188 if (i <= 0)
1189 return(NULL);
1190
1191 cur = src;
1192 while (*cur == 0x20) {
1193 cur++;
1194 remove_head++;
1195 }
1196 while (*cur != 0) {
1197 if (*cur == 0x20) {
1198 cur++;
1199 if ((*cur == 0x20) || (*cur == 0)) {
1200 need_realloc = 1;
1201 break;
1202 }
1203 } else
1204 cur++;
1205 }
1206 if (need_realloc) {
1207 xmlChar *ret;
1208
1209 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1210 if (ret == NULL) {
1211 xmlErrMemory(ctxt, NULL);
1212 return(NULL);
1213 }
1214 xmlAttrNormalizeSpace(ret, ret);
1215 *len = (int) strlen((const char *)ret);
1216 return(ret);
1217 } else if (remove_head) {
1218 *len -= remove_head;
1219 memmove(src, src + remove_head, 1 + *len);
1220 return(src);
1221 }
1222 return(NULL);
1223 }
1224
1225 /**
1226 * xmlAddDefAttrs:
1227 * @ctxt: an XML parser context
1228 * @fullname: the element fullname
1229 * @fullattr: the attribute fullname
1230 * @value: the attribute value
1231 *
1232 * Add a defaulted attribute for an element
1233 */
1234 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1235 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1236 const xmlChar *fullname,
1237 const xmlChar *fullattr,
1238 const xmlChar *value) {
1239 xmlDefAttrsPtr defaults;
1240 int len;
1241 const xmlChar *name;
1242 const xmlChar *prefix;
1243
1244 /*
1245 * Allows to detect attribute redefinitions
1246 */
1247 if (ctxt->attsSpecial != NULL) {
1248 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1249 return;
1250 }
1251
1252 if (ctxt->attsDefault == NULL) {
1253 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1254 if (ctxt->attsDefault == NULL)
1255 goto mem_error;
1256 }
1257
1258 /*
1259 * split the element name into prefix:localname , the string found
1260 * are within the DTD and then not associated to namespace names.
1261 */
1262 name = xmlSplitQName3(fullname, &len);
1263 if (name == NULL) {
1264 name = xmlDictLookup(ctxt->dict, fullname, -1);
1265 prefix = NULL;
1266 } else {
1267 name = xmlDictLookup(ctxt->dict, name, -1);
1268 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1269 }
1270
1271 /*
1272 * make sure there is some storage
1273 */
1274 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1275 if (defaults == NULL) {
1276 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1277 (4 * 5) * sizeof(const xmlChar *));
1278 if (defaults == NULL)
1279 goto mem_error;
1280 defaults->nbAttrs = 0;
1281 defaults->maxAttrs = 4;
1282 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1283 defaults, NULL) < 0) {
1284 xmlFree(defaults);
1285 goto mem_error;
1286 }
1287 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1288 xmlDefAttrsPtr temp;
1289
1290 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1291 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1292 if (temp == NULL)
1293 goto mem_error;
1294 defaults = temp;
1295 defaults->maxAttrs *= 2;
1296 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 defaults, NULL) < 0) {
1298 xmlFree(defaults);
1299 goto mem_error;
1300 }
1301 }
1302
1303 /*
1304 * Split the element name into prefix:localname , the string found
1305 * are within the DTD and hen not associated to namespace names.
1306 */
1307 name = xmlSplitQName3(fullattr, &len);
1308 if (name == NULL) {
1309 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1310 prefix = NULL;
1311 } else {
1312 name = xmlDictLookup(ctxt->dict, name, -1);
1313 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1314 }
1315
1316 defaults->values[5 * defaults->nbAttrs] = name;
1317 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1318 /* intern the string and precompute the end */
1319 len = xmlStrlen(value);
1320 value = xmlDictLookup(ctxt->dict, value, len);
1321 defaults->values[5 * defaults->nbAttrs + 2] = value;
1322 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1323 if (ctxt->external)
1324 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1325 else
1326 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1327 defaults->nbAttrs++;
1328
1329 return;
1330
1331 mem_error:
1332 xmlErrMemory(ctxt, NULL);
1333 return;
1334 }
1335
1336 /**
1337 * xmlAddSpecialAttr:
1338 * @ctxt: an XML parser context
1339 * @fullname: the element fullname
1340 * @fullattr: the attribute fullname
1341 * @type: the attribute type
1342 *
1343 * Register this attribute type
1344 */
1345 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1346 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1347 const xmlChar *fullname,
1348 const xmlChar *fullattr,
1349 int type)
1350 {
1351 if (ctxt->attsSpecial == NULL) {
1352 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1353 if (ctxt->attsSpecial == NULL)
1354 goto mem_error;
1355 }
1356
1357 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1358 return;
1359
1360 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1361 (void *) (ptrdiff_t) type);
1362 return;
1363
1364 mem_error:
1365 xmlErrMemory(ctxt, NULL);
1366 return;
1367 }
1368
1369 /**
1370 * xmlCleanSpecialAttrCallback:
1371 *
1372 * Removes CDATA attributes from the special attribute table
1373 */
1374 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1375 xmlCleanSpecialAttrCallback(void *payload, void *data,
1376 const xmlChar *fullname, const xmlChar *fullattr,
1377 const xmlChar *unused ATTRIBUTE_UNUSED) {
1378 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1379
1380 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1381 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1382 }
1383 }
1384
1385 /**
1386 * xmlCleanSpecialAttr:
1387 * @ctxt: an XML parser context
1388 *
1389 * Trim the list of attributes defined to remove all those of type
1390 * CDATA as they are not special. This call should be done when finishing
1391 * to parse the DTD and before starting to parse the document root.
1392 */
1393 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1394 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1395 {
1396 if (ctxt->attsSpecial == NULL)
1397 return;
1398
1399 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1400
1401 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1402 xmlHashFree(ctxt->attsSpecial, NULL);
1403 ctxt->attsSpecial = NULL;
1404 }
1405 return;
1406 }
1407
1408 /**
1409 * xmlCheckLanguageID:
1410 * @lang: pointer to the string value
1411 *
1412 * Checks that the value conforms to the LanguageID production:
1413 *
1414 * NOTE: this is somewhat deprecated, those productions were removed from
1415 * the XML Second edition.
1416 *
1417 * [33] LanguageID ::= Langcode ('-' Subcode)*
1418 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1419 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1420 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1421 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1422 * [38] Subcode ::= ([a-z] | [A-Z])+
1423 *
1424 * The current REC reference the sucessors of RFC 1766, currently 5646
1425 *
1426 * http://www.rfc-editor.org/rfc/rfc5646.txt
1427 * langtag = language
1428 * ["-" script]
1429 * ["-" region]
1430 * *("-" variant)
1431 * *("-" extension)
1432 * ["-" privateuse]
1433 * language = 2*3ALPHA ; shortest ISO 639 code
1434 * ["-" extlang] ; sometimes followed by
1435 * ; extended language subtags
1436 * / 4ALPHA ; or reserved for future use
1437 * / 5*8ALPHA ; or registered language subtag
1438 *
1439 * extlang = 3ALPHA ; selected ISO 639 codes
1440 * *2("-" 3ALPHA) ; permanently reserved
1441 *
1442 * script = 4ALPHA ; ISO 15924 code
1443 *
1444 * region = 2ALPHA ; ISO 3166-1 code
1445 * / 3DIGIT ; UN M.49 code
1446 *
1447 * variant = 5*8alphanum ; registered variants
1448 * / (DIGIT 3alphanum)
1449 *
1450 * extension = singleton 1*("-" (2*8alphanum))
1451 *
1452 * ; Single alphanumerics
1453 * ; "x" reserved for private use
1454 * singleton = DIGIT ; 0 - 9
1455 * / %x41-57 ; A - W
1456 * / %x59-5A ; Y - Z
1457 * / %x61-77 ; a - w
1458 * / %x79-7A ; y - z
1459 *
1460 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1461 * The parser below doesn't try to cope with extension or privateuse
1462 * that could be added but that's not interoperable anyway
1463 *
1464 * Returns 1 if correct 0 otherwise
1465 **/
1466 int
xmlCheckLanguageID(const xmlChar * lang)1467 xmlCheckLanguageID(const xmlChar * lang)
1468 {
1469 const xmlChar *cur = lang, *nxt;
1470
1471 if (cur == NULL)
1472 return (0);
1473 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1474 ((cur[0] == 'I') && (cur[1] == '-')) ||
1475 ((cur[0] == 'x') && (cur[1] == '-')) ||
1476 ((cur[0] == 'X') && (cur[1] == '-'))) {
1477 /*
1478 * Still allow IANA code and user code which were coming
1479 * from the previous version of the XML-1.0 specification
1480 * it's deprecated but we should not fail
1481 */
1482 cur += 2;
1483 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1484 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1485 cur++;
1486 return(cur[0] == 0);
1487 }
1488 nxt = cur;
1489 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1490 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1491 nxt++;
1492 if (nxt - cur >= 4) {
1493 /*
1494 * Reserved
1495 */
1496 if ((nxt - cur > 8) || (nxt[0] != 0))
1497 return(0);
1498 return(1);
1499 }
1500 if (nxt - cur < 2)
1501 return(0);
1502 /* we got an ISO 639 code */
1503 if (nxt[0] == 0)
1504 return(1);
1505 if (nxt[0] != '-')
1506 return(0);
1507
1508 nxt++;
1509 cur = nxt;
1510 /* now we can have extlang or script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512 goto region_m49;
1513
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516 nxt++;
1517 if (nxt - cur == 4)
1518 goto script;
1519 if (nxt - cur == 2)
1520 goto region;
1521 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522 goto variant;
1523 if (nxt - cur != 3)
1524 return(0);
1525 /* we parsed an extlang */
1526 if (nxt[0] == 0)
1527 return(1);
1528 if (nxt[0] != '-')
1529 return(0);
1530
1531 nxt++;
1532 cur = nxt;
1533 /* now we can have script or region or variant */
1534 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1535 goto region_m49;
1536
1537 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1538 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1539 nxt++;
1540 if (nxt - cur == 2)
1541 goto region;
1542 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1543 goto variant;
1544 if (nxt - cur != 4)
1545 return(0);
1546 /* we parsed a script */
1547 script:
1548 if (nxt[0] == 0)
1549 return(1);
1550 if (nxt[0] != '-')
1551 return(0);
1552
1553 nxt++;
1554 cur = nxt;
1555 /* now we can have region or variant */
1556 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1557 goto region_m49;
1558
1559 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1560 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1561 nxt++;
1562
1563 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1564 goto variant;
1565 if (nxt - cur != 2)
1566 return(0);
1567 /* we parsed a region */
1568 region:
1569 if (nxt[0] == 0)
1570 return(1);
1571 if (nxt[0] != '-')
1572 return(0);
1573
1574 nxt++;
1575 cur = nxt;
1576 /* now we can just have a variant */
1577 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1578 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1579 nxt++;
1580
1581 if ((nxt - cur < 5) || (nxt - cur > 8))
1582 return(0);
1583
1584 /* we parsed a variant */
1585 variant:
1586 if (nxt[0] == 0)
1587 return(1);
1588 if (nxt[0] != '-')
1589 return(0);
1590 /* extensions and private use subtags not checked */
1591 return (1);
1592
1593 region_m49:
1594 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1595 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1596 nxt += 3;
1597 goto region;
1598 }
1599 return(0);
1600 }
1601
1602 /************************************************************************
1603 * *
1604 * Parser stacks related functions and macros *
1605 * *
1606 ************************************************************************/
1607
1608 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1609 const xmlChar ** str);
1610
1611 #ifdef SAX2
1612 /**
1613 * nsPush:
1614 * @ctxt: an XML parser context
1615 * @prefix: the namespace prefix or NULL
1616 * @URL: the namespace name
1617 *
1618 * Pushes a new parser namespace on top of the ns stack
1619 *
1620 * Returns -1 in case of error, -2 if the namespace should be discarded
1621 * and the index in the stack otherwise.
1622 */
1623 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1624 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1625 {
1626 if (ctxt->options & XML_PARSE_NSCLEAN) {
1627 int i;
1628 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1629 if (ctxt->nsTab[i] == prefix) {
1630 /* in scope */
1631 if (ctxt->nsTab[i + 1] == URL)
1632 return(-2);
1633 /* out of scope keep it */
1634 break;
1635 }
1636 }
1637 }
1638 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1639 ctxt->nsMax = 10;
1640 ctxt->nsNr = 0;
1641 ctxt->nsTab = (const xmlChar **)
1642 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1643 if (ctxt->nsTab == NULL) {
1644 xmlErrMemory(ctxt, NULL);
1645 ctxt->nsMax = 0;
1646 return (-1);
1647 }
1648 } else if (ctxt->nsNr >= ctxt->nsMax) {
1649 const xmlChar ** tmp;
1650 ctxt->nsMax *= 2;
1651 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1652 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1653 if (tmp == NULL) {
1654 xmlErrMemory(ctxt, NULL);
1655 ctxt->nsMax /= 2;
1656 return (-1);
1657 }
1658 ctxt->nsTab = tmp;
1659 }
1660 ctxt->nsTab[ctxt->nsNr++] = prefix;
1661 ctxt->nsTab[ctxt->nsNr++] = URL;
1662 return (ctxt->nsNr);
1663 }
1664 /**
1665 * nsPop:
1666 * @ctxt: an XML parser context
1667 * @nr: the number to pop
1668 *
1669 * Pops the top @nr parser prefix/namespace from the ns stack
1670 *
1671 * Returns the number of namespaces removed
1672 */
1673 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1674 nsPop(xmlParserCtxtPtr ctxt, int nr)
1675 {
1676 int i;
1677
1678 if (ctxt->nsTab == NULL) return(0);
1679 if (ctxt->nsNr < nr) {
1680 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1681 nr = ctxt->nsNr;
1682 }
1683 if (ctxt->nsNr <= 0)
1684 return (0);
1685
1686 for (i = 0;i < nr;i++) {
1687 ctxt->nsNr--;
1688 ctxt->nsTab[ctxt->nsNr] = NULL;
1689 }
1690 return(nr);
1691 }
1692 #endif
1693
1694 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1695 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1696 const xmlChar **atts;
1697 int *attallocs;
1698 int maxatts;
1699
1700 if (ctxt->atts == NULL) {
1701 maxatts = 55; /* allow for 10 attrs by default */
1702 atts = (const xmlChar **)
1703 xmlMalloc(maxatts * sizeof(xmlChar *));
1704 if (atts == NULL) goto mem_error;
1705 ctxt->atts = atts;
1706 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1707 if (attallocs == NULL) goto mem_error;
1708 ctxt->attallocs = attallocs;
1709 ctxt->maxatts = maxatts;
1710 } else if (nr + 5 > ctxt->maxatts) {
1711 maxatts = (nr + 5) * 2;
1712 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1713 maxatts * sizeof(const xmlChar *));
1714 if (atts == NULL) goto mem_error;
1715 ctxt->atts = atts;
1716 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1717 (maxatts / 5) * sizeof(int));
1718 if (attallocs == NULL) goto mem_error;
1719 ctxt->attallocs = attallocs;
1720 ctxt->maxatts = maxatts;
1721 }
1722 return(ctxt->maxatts);
1723 mem_error:
1724 xmlErrMemory(ctxt, NULL);
1725 return(-1);
1726 }
1727
1728 /**
1729 * inputPush:
1730 * @ctxt: an XML parser context
1731 * @value: the parser input
1732 *
1733 * Pushes a new parser input on top of the input stack
1734 *
1735 * Returns -1 in case of error, the index in the stack otherwise
1736 */
1737 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1738 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1739 {
1740 if ((ctxt == NULL) || (value == NULL))
1741 return(-1);
1742 if (ctxt->inputNr >= ctxt->inputMax) {
1743 ctxt->inputMax *= 2;
1744 ctxt->inputTab =
1745 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1746 ctxt->inputMax *
1747 sizeof(ctxt->inputTab[0]));
1748 if (ctxt->inputTab == NULL) {
1749 xmlErrMemory(ctxt, NULL);
1750 xmlFreeInputStream(value);
1751 ctxt->inputMax /= 2;
1752 value = NULL;
1753 return (-1);
1754 }
1755 }
1756 ctxt->inputTab[ctxt->inputNr] = value;
1757 ctxt->input = value;
1758 return (ctxt->inputNr++);
1759 }
1760 /**
1761 * inputPop:
1762 * @ctxt: an XML parser context
1763 *
1764 * Pops the top parser input from the input stack
1765 *
1766 * Returns the input just removed
1767 */
1768 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1769 inputPop(xmlParserCtxtPtr ctxt)
1770 {
1771 xmlParserInputPtr ret;
1772
1773 if (ctxt == NULL)
1774 return(NULL);
1775 if (ctxt->inputNr <= 0)
1776 return (NULL);
1777 ctxt->inputNr--;
1778 if (ctxt->inputNr > 0)
1779 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1780 else
1781 ctxt->input = NULL;
1782 ret = ctxt->inputTab[ctxt->inputNr];
1783 ctxt->inputTab[ctxt->inputNr] = NULL;
1784 return (ret);
1785 }
1786 /**
1787 * nodePush:
1788 * @ctxt: an XML parser context
1789 * @value: the element node
1790 *
1791 * Pushes a new element node on top of the node stack
1792 *
1793 * Returns -1 in case of error, the index in the stack otherwise
1794 */
1795 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1796 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1797 {
1798 if (ctxt == NULL) return(0);
1799 if (ctxt->nodeNr >= ctxt->nodeMax) {
1800 xmlNodePtr *tmp;
1801
1802 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1803 ctxt->nodeMax * 2 *
1804 sizeof(ctxt->nodeTab[0]));
1805 if (tmp == NULL) {
1806 xmlErrMemory(ctxt, NULL);
1807 return (-1);
1808 }
1809 ctxt->nodeTab = tmp;
1810 ctxt->nodeMax *= 2;
1811 }
1812 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1813 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1814 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1815 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1816 xmlParserMaxDepth);
1817 xmlHaltParser(ctxt);
1818 return(-1);
1819 }
1820 ctxt->nodeTab[ctxt->nodeNr] = value;
1821 ctxt->node = value;
1822 return (ctxt->nodeNr++);
1823 }
1824
1825 /**
1826 * nodePop:
1827 * @ctxt: an XML parser context
1828 *
1829 * Pops the top element node from the node stack
1830 *
1831 * Returns the node just removed
1832 */
1833 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1834 nodePop(xmlParserCtxtPtr ctxt)
1835 {
1836 xmlNodePtr ret;
1837
1838 if (ctxt == NULL) return(NULL);
1839 if (ctxt->nodeNr <= 0)
1840 return (NULL);
1841 ctxt->nodeNr--;
1842 if (ctxt->nodeNr > 0)
1843 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1844 else
1845 ctxt->node = NULL;
1846 ret = ctxt->nodeTab[ctxt->nodeNr];
1847 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1848 return (ret);
1849 }
1850
1851 #ifdef LIBXML_PUSH_ENABLED
1852 /**
1853 * nameNsPush:
1854 * @ctxt: an XML parser context
1855 * @value: the element name
1856 * @prefix: the element prefix
1857 * @URI: the element namespace name
1858 *
1859 * Pushes a new element name/prefix/URL on top of the name stack
1860 *
1861 * Returns -1 in case of error, the index in the stack otherwise
1862 */
1863 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1864 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1865 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1866 {
1867 if (ctxt->nameNr >= ctxt->nameMax) {
1868 const xmlChar * *tmp;
1869 void **tmp2;
1870 ctxt->nameMax *= 2;
1871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1872 ctxt->nameMax *
1873 sizeof(ctxt->nameTab[0]));
1874 if (tmp == NULL) {
1875 ctxt->nameMax /= 2;
1876 goto mem_error;
1877 }
1878 ctxt->nameTab = tmp;
1879 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1880 ctxt->nameMax * 3 *
1881 sizeof(ctxt->pushTab[0]));
1882 if (tmp2 == NULL) {
1883 ctxt->nameMax /= 2;
1884 goto mem_error;
1885 }
1886 ctxt->pushTab = tmp2;
1887 }
1888 ctxt->nameTab[ctxt->nameNr] = value;
1889 ctxt->name = value;
1890 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1891 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1892 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1893 return (ctxt->nameNr++);
1894 mem_error:
1895 xmlErrMemory(ctxt, NULL);
1896 return (-1);
1897 }
1898 /**
1899 * nameNsPop:
1900 * @ctxt: an XML parser context
1901 *
1902 * Pops the top element/prefix/URI name from the name stack
1903 *
1904 * Returns the name just removed
1905 */
1906 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1907 nameNsPop(xmlParserCtxtPtr ctxt)
1908 {
1909 const xmlChar *ret;
1910
1911 if (ctxt->nameNr <= 0)
1912 return (NULL);
1913 ctxt->nameNr--;
1914 if (ctxt->nameNr > 0)
1915 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1916 else
1917 ctxt->name = NULL;
1918 ret = ctxt->nameTab[ctxt->nameNr];
1919 ctxt->nameTab[ctxt->nameNr] = NULL;
1920 return (ret);
1921 }
1922 #endif /* LIBXML_PUSH_ENABLED */
1923
1924 /**
1925 * namePush:
1926 * @ctxt: an XML parser context
1927 * @value: the element name
1928 *
1929 * Pushes a new element name on top of the name stack
1930 *
1931 * Returns -1 in case of error, the index in the stack otherwise
1932 */
1933 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1934 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1935 {
1936 if (ctxt == NULL) return (-1);
1937
1938 if (ctxt->nameNr >= ctxt->nameMax) {
1939 const xmlChar * *tmp;
1940 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1941 ctxt->nameMax * 2 *
1942 sizeof(ctxt->nameTab[0]));
1943 if (tmp == NULL) {
1944 goto mem_error;
1945 }
1946 ctxt->nameTab = tmp;
1947 ctxt->nameMax *= 2;
1948 }
1949 ctxt->nameTab[ctxt->nameNr] = value;
1950 ctxt->name = value;
1951 return (ctxt->nameNr++);
1952 mem_error:
1953 xmlErrMemory(ctxt, NULL);
1954 return (-1);
1955 }
1956 /**
1957 * namePop:
1958 * @ctxt: an XML parser context
1959 *
1960 * Pops the top element name from the name stack
1961 *
1962 * Returns the name just removed
1963 */
1964 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1965 namePop(xmlParserCtxtPtr ctxt)
1966 {
1967 const xmlChar *ret;
1968
1969 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1970 return (NULL);
1971 ctxt->nameNr--;
1972 if (ctxt->nameNr > 0)
1973 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1974 else
1975 ctxt->name = NULL;
1976 ret = ctxt->nameTab[ctxt->nameNr];
1977 ctxt->nameTab[ctxt->nameNr] = NULL;
1978 return (ret);
1979 }
1980
spacePush(xmlParserCtxtPtr ctxt,int val)1981 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1982 if (ctxt->spaceNr >= ctxt->spaceMax) {
1983 int *tmp;
1984
1985 ctxt->spaceMax *= 2;
1986 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1987 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1988 if (tmp == NULL) {
1989 xmlErrMemory(ctxt, NULL);
1990 ctxt->spaceMax /=2;
1991 return(-1);
1992 }
1993 ctxt->spaceTab = tmp;
1994 }
1995 ctxt->spaceTab[ctxt->spaceNr] = val;
1996 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1997 return(ctxt->spaceNr++);
1998 }
1999
spacePop(xmlParserCtxtPtr ctxt)2000 static int spacePop(xmlParserCtxtPtr ctxt) {
2001 int ret;
2002 if (ctxt->spaceNr <= 0) return(0);
2003 ctxt->spaceNr--;
2004 if (ctxt->spaceNr > 0)
2005 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2006 else
2007 ctxt->space = &ctxt->spaceTab[0];
2008 ret = ctxt->spaceTab[ctxt->spaceNr];
2009 ctxt->spaceTab[ctxt->spaceNr] = -1;
2010 return(ret);
2011 }
2012
2013 /*
2014 * Macros for accessing the content. Those should be used only by the parser,
2015 * and not exported.
2016 *
2017 * Dirty macros, i.e. one often need to make assumption on the context to
2018 * use them
2019 *
2020 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2021 * To be used with extreme caution since operations consuming
2022 * characters may move the input buffer to a different location !
2023 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2024 * This should be used internally by the parser
2025 * only to compare to ASCII values otherwise it would break when
2026 * running with UTF-8 encoding.
2027 * RAW same as CUR but in the input buffer, bypass any token
2028 * extraction that may have been done
2029 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2030 * to compare on ASCII based substring.
2031 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2032 * strings without newlines within the parser.
2033 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2034 * defined char within the parser.
2035 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2036 *
2037 * NEXT Skip to the next character, this does the proper decoding
2038 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2039 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2040 * CUR_CHAR(l) returns the current unicode character (int), set l
2041 * to the number of xmlChars used for the encoding [0-5].
2042 * CUR_SCHAR same but operate on a string instead of the context
2043 * COPY_BUF copy the current unicode char to the target buffer, increment
2044 * the index
2045 * GROW, SHRINK handling of input buffers
2046 */
2047
2048 #define RAW (*ctxt->input->cur)
2049 #define CUR (*ctxt->input->cur)
2050 #define NXT(val) ctxt->input->cur[(val)]
2051 #define CUR_PTR ctxt->input->cur
2052 #define BASE_PTR ctxt->input->base
2053
2054 #define CMP4( s, c1, c2, c3, c4 ) \
2055 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2056 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2057 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2058 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2059 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2060 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2061 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2062 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2063 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2064 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2065 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2066 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2067 ((unsigned char *) s)[ 8 ] == c9 )
2068 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2069 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2070 ((unsigned char *) s)[ 9 ] == c10 )
2071
2072 #define SKIP(val) do { \
2073 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2074 if (*ctxt->input->cur == 0) \
2075 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2076 } while (0)
2077
2078 #define SKIPL(val) do { \
2079 int skipl; \
2080 for(skipl=0; skipl<val; skipl++) { \
2081 if (*(ctxt->input->cur) == '\n') { \
2082 ctxt->input->line++; ctxt->input->col = 1; \
2083 } else ctxt->input->col++; \
2084 ctxt->nbChars++; \
2085 ctxt->input->cur++; \
2086 } \
2087 if (*ctxt->input->cur == 0) \
2088 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2089 } while (0)
2090
2091 #define SHRINK if ((ctxt->progressive == 0) && \
2092 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2093 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2094 xmlSHRINK (ctxt);
2095
xmlSHRINK(xmlParserCtxtPtr ctxt)2096 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2097 xmlParserInputShrink(ctxt->input);
2098 if (*ctxt->input->cur == 0)
2099 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2100 }
2101
2102 #define GROW if ((ctxt->progressive == 0) && \
2103 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2104 xmlGROW (ctxt);
2105
xmlGROW(xmlParserCtxtPtr ctxt)2106 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2107 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2108 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2109
2110 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2111 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2112 ((ctxt->input->buf) &&
2113 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2114 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2115 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2116 xmlHaltParser(ctxt);
2117 return;
2118 }
2119 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2120 if ((ctxt->input->cur > ctxt->input->end) ||
2121 (ctxt->input->cur < ctxt->input->base)) {
2122 xmlHaltParser(ctxt);
2123 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2124 return;
2125 }
2126 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2127 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2128 }
2129
2130 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2131
2132 #define NEXT xmlNextChar(ctxt)
2133
2134 #define NEXT1 { \
2135 ctxt->input->col++; \
2136 ctxt->input->cur++; \
2137 ctxt->nbChars++; \
2138 if (*ctxt->input->cur == 0) \
2139 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2140 }
2141
2142 #define NEXTL(l) do { \
2143 if (*(ctxt->input->cur) == '\n') { \
2144 ctxt->input->line++; ctxt->input->col = 1; \
2145 } else ctxt->input->col++; \
2146 ctxt->input->cur += l; \
2147 } while (0)
2148
2149 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2150 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2151
2152 #define COPY_BUF(l,b,i,v) \
2153 if (l == 1) b[i++] = (xmlChar) v; \
2154 else i += xmlCopyCharMultiByte(&b[i],v)
2155
2156 /**
2157 * xmlSkipBlankChars:
2158 * @ctxt: the XML parser context
2159 *
2160 * skip all blanks character found at that point in the input streams.
2161 * It pops up finished entities in the process if allowable at that point.
2162 *
2163 * Returns the number of space chars skipped
2164 */
2165
2166 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2167 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2168 int res = 0;
2169
2170 /*
2171 * It's Okay to use CUR/NEXT here since all the blanks are on
2172 * the ASCII range.
2173 */
2174 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2175 const xmlChar *cur;
2176 /*
2177 * if we are in the document content, go really fast
2178 */
2179 cur = ctxt->input->cur;
2180 while (IS_BLANK_CH(*cur)) {
2181 if (*cur == '\n') {
2182 ctxt->input->line++; ctxt->input->col = 1;
2183 } else {
2184 ctxt->input->col++;
2185 }
2186 cur++;
2187 res++;
2188 if (*cur == 0) {
2189 ctxt->input->cur = cur;
2190 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2191 cur = ctxt->input->cur;
2192 }
2193 }
2194 ctxt->input->cur = cur;
2195 } else {
2196 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2197
2198 while (1) {
2199 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2200 NEXT;
2201 } else if (CUR == '%') {
2202 /*
2203 * Need to handle support of entities branching here
2204 */
2205 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2206 break;
2207 xmlParsePEReference(ctxt);
2208 } else if (CUR == 0) {
2209 if (ctxt->inputNr <= 1)
2210 break;
2211 xmlPopInput(ctxt);
2212 } else {
2213 break;
2214 }
2215
2216 /*
2217 * Also increase the counter when entering or exiting a PERef.
2218 * The spec says: "When a parameter-entity reference is recognized
2219 * in the DTD and included, its replacement text MUST be enlarged
2220 * by the attachment of one leading and one following space (#x20)
2221 * character."
2222 */
2223 res++;
2224 }
2225 }
2226 return(res);
2227 }
2228
2229 /************************************************************************
2230 * *
2231 * Commodity functions to handle entities *
2232 * *
2233 ************************************************************************/
2234
2235 /**
2236 * xmlPopInput:
2237 * @ctxt: an XML parser context
2238 *
2239 * xmlPopInput: the current input pointed by ctxt->input came to an end
2240 * pop it and return the next char.
2241 *
2242 * Returns the current xmlChar in the parser context
2243 */
2244 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2245 xmlPopInput(xmlParserCtxtPtr ctxt) {
2246 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2247 if (xmlParserDebugEntities)
2248 xmlGenericError(xmlGenericErrorContext,
2249 "Popping input %d\n", ctxt->inputNr);
2250 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2251 (ctxt->instate != XML_PARSER_EOF))
2252 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2253 "Unfinished entity outside the DTD");
2254 xmlFreeInputStream(inputPop(ctxt));
2255 if (*ctxt->input->cur == 0)
2256 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2257 return(CUR);
2258 }
2259
2260 /**
2261 * xmlPushInput:
2262 * @ctxt: an XML parser context
2263 * @input: an XML parser input fragment (entity, XML fragment ...).
2264 *
2265 * xmlPushInput: switch to a new input stream which is stacked on top
2266 * of the previous one(s).
2267 * Returns -1 in case of error or the index in the input stack
2268 */
2269 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2270 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2271 int ret;
2272 if (input == NULL) return(-1);
2273
2274 if (xmlParserDebugEntities) {
2275 if ((ctxt->input != NULL) && (ctxt->input->filename))
2276 xmlGenericError(xmlGenericErrorContext,
2277 "%s(%d): ", ctxt->input->filename,
2278 ctxt->input->line);
2279 xmlGenericError(xmlGenericErrorContext,
2280 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2281 }
2282 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2283 (ctxt->inputNr > 1024)) {
2284 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2285 while (ctxt->inputNr > 1)
2286 xmlFreeInputStream(inputPop(ctxt));
2287 return(-1);
2288 }
2289 ret = inputPush(ctxt, input);
2290 if (ctxt->instate == XML_PARSER_EOF)
2291 return(-1);
2292 GROW;
2293 return(ret);
2294 }
2295
2296 /**
2297 * xmlParseCharRef:
2298 * @ctxt: an XML parser context
2299 *
2300 * parse Reference declarations
2301 *
2302 * [66] CharRef ::= '&#' [0-9]+ ';' |
2303 * '&#x' [0-9a-fA-F]+ ';'
2304 *
2305 * [ WFC: Legal Character ]
2306 * Characters referred to using character references must match the
2307 * production for Char.
2308 *
2309 * Returns the value parsed (as an int), 0 in case of error
2310 */
2311 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2312 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2313 unsigned int val = 0;
2314 int count = 0;
2315 unsigned int outofrange = 0;
2316
2317 /*
2318 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2319 */
2320 if ((RAW == '&') && (NXT(1) == '#') &&
2321 (NXT(2) == 'x')) {
2322 SKIP(3);
2323 GROW;
2324 while (RAW != ';') { /* loop blocked by count */
2325 if (count++ > 20) {
2326 count = 0;
2327 GROW;
2328 if (ctxt->instate == XML_PARSER_EOF)
2329 return(0);
2330 }
2331 if ((RAW >= '0') && (RAW <= '9'))
2332 val = val * 16 + (CUR - '0');
2333 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2334 val = val * 16 + (CUR - 'a') + 10;
2335 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2336 val = val * 16 + (CUR - 'A') + 10;
2337 else {
2338 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2339 val = 0;
2340 break;
2341 }
2342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
2345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 ctxt->input->col++;
2351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else if ((RAW == '&') && (NXT(1) == '#')) {
2355 SKIP(2);
2356 GROW;
2357 while (RAW != ';') { /* loop blocked by count */
2358 if (count++ > 20) {
2359 count = 0;
2360 GROW;
2361 if (ctxt->instate == XML_PARSER_EOF)
2362 return(0);
2363 }
2364 if ((RAW >= '0') && (RAW <= '9'))
2365 val = val * 10 + (CUR - '0');
2366 else {
2367 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2368 val = 0;
2369 break;
2370 }
2371 if (val > 0x10FFFF)
2372 outofrange = val;
2373
2374 NEXT;
2375 count++;
2376 }
2377 if (RAW == ';') {
2378 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2379 ctxt->input->col++;
2380 ctxt->nbChars ++;
2381 ctxt->input->cur++;
2382 }
2383 } else {
2384 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2385 }
2386
2387 /*
2388 * [ WFC: Legal Character ]
2389 * Characters referred to using character references must match the
2390 * production for Char.
2391 */
2392 if ((IS_CHAR(val) && (outofrange == 0))) {
2393 return(val);
2394 } else {
2395 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396 "xmlParseCharRef: invalid xmlChar value %d\n",
2397 val);
2398 }
2399 return(0);
2400 }
2401
2402 /**
2403 * xmlParseStringCharRef:
2404 * @ctxt: an XML parser context
2405 * @str: a pointer to an index in the string
2406 *
2407 * parse Reference declarations, variant parsing from a string rather
2408 * than an an input flow.
2409 *
2410 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411 * '&#x' [0-9a-fA-F]+ ';'
2412 *
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2416 *
2417 * Returns the value parsed (as an int), 0 in case of error, str will be
2418 * updated to the current value of the index
2419 */
2420 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2421 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422 const xmlChar *ptr;
2423 xmlChar cur;
2424 unsigned int val = 0;
2425 unsigned int outofrange = 0;
2426
2427 if ((str == NULL) || (*str == NULL)) return(0);
2428 ptr = *str;
2429 cur = *ptr;
2430 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2431 ptr += 3;
2432 cur = *ptr;
2433 while (cur != ';') { /* Non input consuming loop */
2434 if ((cur >= '0') && (cur <= '9'))
2435 val = val * 16 + (cur - '0');
2436 else if ((cur >= 'a') && (cur <= 'f'))
2437 val = val * 16 + (cur - 'a') + 10;
2438 else if ((cur >= 'A') && (cur <= 'F'))
2439 val = val * 16 + (cur - 'A') + 10;
2440 else {
2441 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2442 val = 0;
2443 break;
2444 }
2445 if (val > 0x10FFFF)
2446 outofrange = val;
2447
2448 ptr++;
2449 cur = *ptr;
2450 }
2451 if (cur == ';')
2452 ptr++;
2453 } else if ((cur == '&') && (ptr[1] == '#')){
2454 ptr += 2;
2455 cur = *ptr;
2456 while (cur != ';') { /* Non input consuming loops */
2457 if ((cur >= '0') && (cur <= '9'))
2458 val = val * 10 + (cur - '0');
2459 else {
2460 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2461 val = 0;
2462 break;
2463 }
2464 if (val > 0x10FFFF)
2465 outofrange = val;
2466
2467 ptr++;
2468 cur = *ptr;
2469 }
2470 if (cur == ';')
2471 ptr++;
2472 } else {
2473 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2474 return(0);
2475 }
2476 *str = ptr;
2477
2478 /*
2479 * [ WFC: Legal Character ]
2480 * Characters referred to using character references must match the
2481 * production for Char.
2482 */
2483 if ((IS_CHAR(val) && (outofrange == 0))) {
2484 return(val);
2485 } else {
2486 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2487 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2488 val);
2489 }
2490 return(0);
2491 }
2492
2493 /**
2494 * xmlParserHandlePEReference:
2495 * @ctxt: the parser context
2496 *
2497 * [69] PEReference ::= '%' Name ';'
2498 *
2499 * [ WFC: No Recursion ]
2500 * A parsed entity must not contain a recursive
2501 * reference to itself, either directly or indirectly.
2502 *
2503 * [ WFC: Entity Declared ]
2504 * In a document without any DTD, a document with only an internal DTD
2505 * subset which contains no parameter entity references, or a document
2506 * with "standalone='yes'", ... ... The declaration of a parameter
2507 * entity must precede any reference to it...
2508 *
2509 * [ VC: Entity Declared ]
2510 * In a document with an external subset or external parameter entities
2511 * with "standalone='no'", ... ... The declaration of a parameter entity
2512 * must precede any reference to it...
2513 *
2514 * [ WFC: In DTD ]
2515 * Parameter-entity references may only appear in the DTD.
2516 * NOTE: misleading but this is handled.
2517 *
2518 * A PEReference may have been detected in the current input stream
2519 * the handling is done accordingly to
2520 * http://www.w3.org/TR/REC-xml#entproc
2521 * i.e.
2522 * - Included in literal in entity values
2523 * - Included as Parameter Entity reference within DTDs
2524 */
2525 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2526 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2527 switch(ctxt->instate) {
2528 case XML_PARSER_CDATA_SECTION:
2529 return;
2530 case XML_PARSER_COMMENT:
2531 return;
2532 case XML_PARSER_START_TAG:
2533 return;
2534 case XML_PARSER_END_TAG:
2535 return;
2536 case XML_PARSER_EOF:
2537 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2538 return;
2539 case XML_PARSER_PROLOG:
2540 case XML_PARSER_START:
2541 case XML_PARSER_MISC:
2542 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2543 return;
2544 case XML_PARSER_ENTITY_DECL:
2545 case XML_PARSER_CONTENT:
2546 case XML_PARSER_ATTRIBUTE_VALUE:
2547 case XML_PARSER_PI:
2548 case XML_PARSER_SYSTEM_LITERAL:
2549 case XML_PARSER_PUBLIC_LITERAL:
2550 /* we just ignore it there */
2551 return;
2552 case XML_PARSER_EPILOG:
2553 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2554 return;
2555 case XML_PARSER_ENTITY_VALUE:
2556 /*
2557 * NOTE: in the case of entity values, we don't do the
2558 * substitution here since we need the literal
2559 * entity value to be able to save the internal
2560 * subset of the document.
2561 * This will be handled by xmlStringDecodeEntities
2562 */
2563 return;
2564 case XML_PARSER_DTD:
2565 /*
2566 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2567 * In the internal DTD subset, parameter-entity references
2568 * can occur only where markup declarations can occur, not
2569 * within markup declarations.
2570 * In that case this is handled in xmlParseMarkupDecl
2571 */
2572 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2573 return;
2574 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2575 return;
2576 break;
2577 case XML_PARSER_IGNORE:
2578 return;
2579 }
2580
2581 xmlParsePEReference(ctxt);
2582 }
2583
2584 /*
2585 * Macro used to grow the current buffer.
2586 * buffer##_size is expected to be a size_t
2587 * mem_error: is expected to handle memory allocation failures
2588 */
2589 #define growBuffer(buffer, n) { \
2590 xmlChar *tmp; \
2591 size_t new_size = buffer##_size * 2 + n; \
2592 if (new_size < buffer##_size) goto mem_error; \
2593 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2594 if (tmp == NULL) goto mem_error; \
2595 buffer = tmp; \
2596 buffer##_size = new_size; \
2597 }
2598
2599 /**
2600 * xmlStringLenDecodeEntities:
2601 * @ctxt: the parser context
2602 * @str: the input string
2603 * @len: the string length
2604 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2605 * @end: an end marker xmlChar, 0 if none
2606 * @end2: an end marker xmlChar, 0 if none
2607 * @end3: an end marker xmlChar, 0 if none
2608 *
2609 * Takes a entity string content and process to do the adequate substitutions.
2610 *
2611 * [67] Reference ::= EntityRef | CharRef
2612 *
2613 * [69] PEReference ::= '%' Name ';'
2614 *
2615 * Returns A newly allocated string with the substitution done. The caller
2616 * must deallocate it !
2617 */
2618 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2619 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2620 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2621 xmlChar *buffer = NULL;
2622 size_t buffer_size = 0;
2623 size_t nbchars = 0;
2624
2625 xmlChar *current = NULL;
2626 xmlChar *rep = NULL;
2627 const xmlChar *last;
2628 xmlEntityPtr ent;
2629 int c,l;
2630
2631 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2632 return(NULL);
2633 last = str + len;
2634
2635 if (((ctxt->depth > 40) &&
2636 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2637 (ctxt->depth > 1024)) {
2638 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2639 return(NULL);
2640 }
2641
2642 /*
2643 * allocate a translation buffer.
2644 */
2645 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2646 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2647 if (buffer == NULL) goto mem_error;
2648
2649 /*
2650 * OK loop until we reach one of the ending char or a size limit.
2651 * we are operating on already parsed values.
2652 */
2653 if (str < last)
2654 c = CUR_SCHAR(str, l);
2655 else
2656 c = 0;
2657 while ((c != 0) && (c != end) && /* non input consuming loop */
2658 (c != end2) && (c != end3)) {
2659
2660 if (c == 0) break;
2661 if ((c == '&') && (str[1] == '#')) {
2662 int val = xmlParseStringCharRef(ctxt, &str);
2663 if (val == 0)
2664 goto int_error;
2665 COPY_BUF(0,buffer,nbchars,val);
2666 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2667 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2668 }
2669 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2670 if (xmlParserDebugEntities)
2671 xmlGenericError(xmlGenericErrorContext,
2672 "String decoding Entity Reference: %.30s\n",
2673 str);
2674 ent = xmlParseStringEntityRef(ctxt, &str);
2675 xmlParserEntityCheck(ctxt, 0, ent, 0);
2676 if (ent != NULL)
2677 ctxt->nbentities += ent->checked / 2;
2678 if ((ent != NULL) &&
2679 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2680 if (ent->content != NULL) {
2681 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2682 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2683 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2684 }
2685 } else {
2686 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2687 "predefined entity has no content\n");
2688 goto int_error;
2689 }
2690 } else if ((ent != NULL) && (ent->content != NULL)) {
2691 ctxt->depth++;
2692 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2693 0, 0, 0);
2694 ctxt->depth--;
2695 if (rep == NULL)
2696 goto int_error;
2697
2698 current = rep;
2699 while (*current != 0) { /* non input consuming loop */
2700 buffer[nbchars++] = *current++;
2701 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2702 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2703 goto int_error;
2704 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2705 }
2706 }
2707 xmlFree(rep);
2708 rep = NULL;
2709 } else if (ent != NULL) {
2710 int i = xmlStrlen(ent->name);
2711 const xmlChar *cur = ent->name;
2712
2713 buffer[nbchars++] = '&';
2714 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2715 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2716 }
2717 for (;i > 0;i--)
2718 buffer[nbchars++] = *cur++;
2719 buffer[nbchars++] = ';';
2720 }
2721 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2722 if (xmlParserDebugEntities)
2723 xmlGenericError(xmlGenericErrorContext,
2724 "String decoding PE Reference: %.30s\n", str);
2725 ent = xmlParseStringPEReference(ctxt, &str);
2726 xmlParserEntityCheck(ctxt, 0, ent, 0);
2727 if (ent != NULL)
2728 ctxt->nbentities += ent->checked / 2;
2729 if (ent != NULL) {
2730 if (ent->content == NULL) {
2731 /*
2732 * Note: external parsed entities will not be loaded,
2733 * it is not required for a non-validating parser to
2734 * complete external PEreferences coming from the
2735 * internal subset
2736 */
2737 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2738 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2739 (ctxt->validate != 0)) {
2740 xmlLoadEntityContent(ctxt, ent);
2741 } else {
2742 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2743 "not validating will not read content for PE entity %s\n",
2744 ent->name, NULL);
2745 }
2746 }
2747 ctxt->depth++;
2748 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2749 0, 0, 0);
2750 ctxt->depth--;
2751 if (rep == NULL)
2752 goto int_error;
2753 current = rep;
2754 while (*current != 0) { /* non input consuming loop */
2755 buffer[nbchars++] = *current++;
2756 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2757 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2758 goto int_error;
2759 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2760 }
2761 }
2762 xmlFree(rep);
2763 rep = NULL;
2764 }
2765 } else {
2766 COPY_BUF(l,buffer,nbchars,c);
2767 str += l;
2768 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2769 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2770 }
2771 }
2772 if (str < last)
2773 c = CUR_SCHAR(str, l);
2774 else
2775 c = 0;
2776 }
2777 buffer[nbchars] = 0;
2778 return(buffer);
2779
2780 mem_error:
2781 xmlErrMemory(ctxt, NULL);
2782 int_error:
2783 if (rep != NULL)
2784 xmlFree(rep);
2785 if (buffer != NULL)
2786 xmlFree(buffer);
2787 return(NULL);
2788 }
2789
2790 /**
2791 * xmlStringDecodeEntities:
2792 * @ctxt: the parser context
2793 * @str: the input string
2794 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2795 * @end: an end marker xmlChar, 0 if none
2796 * @end2: an end marker xmlChar, 0 if none
2797 * @end3: an end marker xmlChar, 0 if none
2798 *
2799 * Takes a entity string content and process to do the adequate substitutions.
2800 *
2801 * [67] Reference ::= EntityRef | CharRef
2802 *
2803 * [69] PEReference ::= '%' Name ';'
2804 *
2805 * Returns A newly allocated string with the substitution done. The caller
2806 * must deallocate it !
2807 */
2808 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2809 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2810 xmlChar end, xmlChar end2, xmlChar end3) {
2811 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2812 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2813 end, end2, end3));
2814 }
2815
2816 /************************************************************************
2817 * *
2818 * Commodity functions, cleanup needed ? *
2819 * *
2820 ************************************************************************/
2821
2822 /**
2823 * areBlanks:
2824 * @ctxt: an XML parser context
2825 * @str: a xmlChar *
2826 * @len: the size of @str
2827 * @blank_chars: we know the chars are blanks
2828 *
2829 * Is this a sequence of blank chars that one can ignore ?
2830 *
2831 * Returns 1 if ignorable 0 otherwise.
2832 */
2833
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2834 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2835 int blank_chars) {
2836 int i, ret;
2837 xmlNodePtr lastChild;
2838
2839 /*
2840 * Don't spend time trying to differentiate them, the same callback is
2841 * used !
2842 */
2843 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2844 return(0);
2845
2846 /*
2847 * Check for xml:space value.
2848 */
2849 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2850 (*(ctxt->space) == -2))
2851 return(0);
2852
2853 /*
2854 * Check that the string is made of blanks
2855 */
2856 if (blank_chars == 0) {
2857 for (i = 0;i < len;i++)
2858 if (!(IS_BLANK_CH(str[i]))) return(0);
2859 }
2860
2861 /*
2862 * Look if the element is mixed content in the DTD if available
2863 */
2864 if (ctxt->node == NULL) return(0);
2865 if (ctxt->myDoc != NULL) {
2866 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2867 if (ret == 0) return(1);
2868 if (ret == 1) return(0);
2869 }
2870
2871 /*
2872 * Otherwise, heuristic :-\
2873 */
2874 if ((RAW != '<') && (RAW != 0xD)) return(0);
2875 if ((ctxt->node->children == NULL) &&
2876 (RAW == '<') && (NXT(1) == '/')) return(0);
2877
2878 lastChild = xmlGetLastChild(ctxt->node);
2879 if (lastChild == NULL) {
2880 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2881 (ctxt->node->content != NULL)) return(0);
2882 } else if (xmlNodeIsText(lastChild))
2883 return(0);
2884 else if ((ctxt->node->children != NULL) &&
2885 (xmlNodeIsText(ctxt->node->children)))
2886 return(0);
2887 return(1);
2888 }
2889
2890 /************************************************************************
2891 * *
2892 * Extra stuff for namespace support *
2893 * Relates to http://www.w3.org/TR/WD-xml-names *
2894 * *
2895 ************************************************************************/
2896
2897 /**
2898 * xmlSplitQName:
2899 * @ctxt: an XML parser context
2900 * @name: an XML parser context
2901 * @prefix: a xmlChar **
2902 *
2903 * parse an UTF8 encoded XML qualified name string
2904 *
2905 * [NS 5] QName ::= (Prefix ':')? LocalPart
2906 *
2907 * [NS 6] Prefix ::= NCName
2908 *
2909 * [NS 7] LocalPart ::= NCName
2910 *
2911 * Returns the local part, and prefix is updated
2912 * to get the Prefix if any.
2913 */
2914
2915 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2916 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2917 xmlChar buf[XML_MAX_NAMELEN + 5];
2918 xmlChar *buffer = NULL;
2919 int len = 0;
2920 int max = XML_MAX_NAMELEN;
2921 xmlChar *ret = NULL;
2922 const xmlChar *cur = name;
2923 int c;
2924
2925 if (prefix == NULL) return(NULL);
2926 *prefix = NULL;
2927
2928 if (cur == NULL) return(NULL);
2929
2930 #ifndef XML_XML_NAMESPACE
2931 /* xml: prefix is not really a namespace */
2932 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2933 (cur[2] == 'l') && (cur[3] == ':'))
2934 return(xmlStrdup(name));
2935 #endif
2936
2937 /* nasty but well=formed */
2938 if (cur[0] == ':')
2939 return(xmlStrdup(name));
2940
2941 c = *cur++;
2942 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2943 buf[len++] = c;
2944 c = *cur++;
2945 }
2946 if (len >= max) {
2947 /*
2948 * Okay someone managed to make a huge name, so he's ready to pay
2949 * for the processing speed.
2950 */
2951 max = len * 2;
2952
2953 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2954 if (buffer == NULL) {
2955 xmlErrMemory(ctxt, NULL);
2956 return(NULL);
2957 }
2958 memcpy(buffer, buf, len);
2959 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2960 if (len + 10 > max) {
2961 xmlChar *tmp;
2962
2963 max *= 2;
2964 tmp = (xmlChar *) xmlRealloc(buffer,
2965 max * sizeof(xmlChar));
2966 if (tmp == NULL) {
2967 xmlFree(buffer);
2968 xmlErrMemory(ctxt, NULL);
2969 return(NULL);
2970 }
2971 buffer = tmp;
2972 }
2973 buffer[len++] = c;
2974 c = *cur++;
2975 }
2976 buffer[len] = 0;
2977 }
2978
2979 if ((c == ':') && (*cur == 0)) {
2980 if (buffer != NULL)
2981 xmlFree(buffer);
2982 *prefix = NULL;
2983 return(xmlStrdup(name));
2984 }
2985
2986 if (buffer == NULL)
2987 ret = xmlStrndup(buf, len);
2988 else {
2989 ret = buffer;
2990 buffer = NULL;
2991 max = XML_MAX_NAMELEN;
2992 }
2993
2994
2995 if (c == ':') {
2996 c = *cur;
2997 *prefix = ret;
2998 if (c == 0) {
2999 return(xmlStrndup(BAD_CAST "", 0));
3000 }
3001 len = 0;
3002
3003 /*
3004 * Check that the first character is proper to start
3005 * a new name
3006 */
3007 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3008 ((c >= 0x41) && (c <= 0x5A)) ||
3009 (c == '_') || (c == ':'))) {
3010 int l;
3011 int first = CUR_SCHAR(cur, l);
3012
3013 if (!IS_LETTER(first) && (first != '_')) {
3014 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3015 "Name %s is not XML Namespace compliant\n",
3016 name);
3017 }
3018 }
3019 cur++;
3020
3021 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3022 buf[len++] = c;
3023 c = *cur++;
3024 }
3025 if (len >= max) {
3026 /*
3027 * Okay someone managed to make a huge name, so he's ready to pay
3028 * for the processing speed.
3029 */
3030 max = len * 2;
3031
3032 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3033 if (buffer == NULL) {
3034 xmlErrMemory(ctxt, NULL);
3035 return(NULL);
3036 }
3037 memcpy(buffer, buf, len);
3038 while (c != 0) { /* tested bigname2.xml */
3039 if (len + 10 > max) {
3040 xmlChar *tmp;
3041
3042 max *= 2;
3043 tmp = (xmlChar *) xmlRealloc(buffer,
3044 max * sizeof(xmlChar));
3045 if (tmp == NULL) {
3046 xmlErrMemory(ctxt, NULL);
3047 xmlFree(buffer);
3048 return(NULL);
3049 }
3050 buffer = tmp;
3051 }
3052 buffer[len++] = c;
3053 c = *cur++;
3054 }
3055 buffer[len] = 0;
3056 }
3057
3058 if (buffer == NULL)
3059 ret = xmlStrndup(buf, len);
3060 else {
3061 ret = buffer;
3062 }
3063 }
3064
3065 return(ret);
3066 }
3067
3068 /************************************************************************
3069 * *
3070 * The parser itself *
3071 * Relates to http://www.w3.org/TR/REC-xml *
3072 * *
3073 ************************************************************************/
3074
3075 /************************************************************************
3076 * *
3077 * Routines to parse Name, NCName and NmToken *
3078 * *
3079 ************************************************************************/
3080 #ifdef DEBUG
3081 static unsigned long nbParseName = 0;
3082 static unsigned long nbParseNmToken = 0;
3083 static unsigned long nbParseNCName = 0;
3084 static unsigned long nbParseNCNameComplex = 0;
3085 static unsigned long nbParseNameComplex = 0;
3086 static unsigned long nbParseStringName = 0;
3087 #endif
3088
3089 /*
3090 * The two following functions are related to the change of accepted
3091 * characters for Name and NmToken in the Revision 5 of XML-1.0
3092 * They correspond to the modified production [4] and the new production [4a]
3093 * changes in that revision. Also note that the macros used for the
3094 * productions Letter, Digit, CombiningChar and Extender are not needed
3095 * anymore.
3096 * We still keep compatibility to pre-revision5 parsing semantic if the
3097 * new XML_PARSE_OLD10 option is given to the parser.
3098 */
3099 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3100 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3101 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3102 /*
3103 * Use the new checks of production [4] [4a] amd [5] of the
3104 * Update 5 of XML-1.0
3105 */
3106 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3107 (((c >= 'a') && (c <= 'z')) ||
3108 ((c >= 'A') && (c <= 'Z')) ||
3109 (c == '_') || (c == ':') ||
3110 ((c >= 0xC0) && (c <= 0xD6)) ||
3111 ((c >= 0xD8) && (c <= 0xF6)) ||
3112 ((c >= 0xF8) && (c <= 0x2FF)) ||
3113 ((c >= 0x370) && (c <= 0x37D)) ||
3114 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3115 ((c >= 0x200C) && (c <= 0x200D)) ||
3116 ((c >= 0x2070) && (c <= 0x218F)) ||
3117 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3118 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3119 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3120 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3121 ((c >= 0x10000) && (c <= 0xEFFFF))))
3122 return(1);
3123 } else {
3124 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3125 return(1);
3126 }
3127 return(0);
3128 }
3129
3130 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3131 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3132 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3133 /*
3134 * Use the new checks of production [4] [4a] amd [5] of the
3135 * Update 5 of XML-1.0
3136 */
3137 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3138 (((c >= 'a') && (c <= 'z')) ||
3139 ((c >= 'A') && (c <= 'Z')) ||
3140 ((c >= '0') && (c <= '9')) || /* !start */
3141 (c == '_') || (c == ':') ||
3142 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3143 ((c >= 0xC0) && (c <= 0xD6)) ||
3144 ((c >= 0xD8) && (c <= 0xF6)) ||
3145 ((c >= 0xF8) && (c <= 0x2FF)) ||
3146 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3147 ((c >= 0x370) && (c <= 0x37D)) ||
3148 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3149 ((c >= 0x200C) && (c <= 0x200D)) ||
3150 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3151 ((c >= 0x2070) && (c <= 0x218F)) ||
3152 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3153 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3154 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3155 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3156 ((c >= 0x10000) && (c <= 0xEFFFF))))
3157 return(1);
3158 } else {
3159 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3160 (c == '.') || (c == '-') ||
3161 (c == '_') || (c == ':') ||
3162 (IS_COMBINING(c)) ||
3163 (IS_EXTENDER(c)))
3164 return(1);
3165 }
3166 return(0);
3167 }
3168
3169 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3170 int *len, int *alloc, int normalize);
3171
3172 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3173 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3174 int len = 0, l;
3175 int c;
3176 int count = 0;
3177
3178 #ifdef DEBUG
3179 nbParseNameComplex++;
3180 #endif
3181
3182 /*
3183 * Handler for more complex cases
3184 */
3185 GROW;
3186 if (ctxt->instate == XML_PARSER_EOF)
3187 return(NULL);
3188 c = CUR_CHAR(l);
3189 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3190 /*
3191 * Use the new checks of production [4] [4a] amd [5] of the
3192 * Update 5 of XML-1.0
3193 */
3194 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3195 (!(((c >= 'a') && (c <= 'z')) ||
3196 ((c >= 'A') && (c <= 'Z')) ||
3197 (c == '_') || (c == ':') ||
3198 ((c >= 0xC0) && (c <= 0xD6)) ||
3199 ((c >= 0xD8) && (c <= 0xF6)) ||
3200 ((c >= 0xF8) && (c <= 0x2FF)) ||
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x2070) && (c <= 0x218F)) ||
3205 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3206 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3207 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3208 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3209 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3210 return(NULL);
3211 }
3212 len += l;
3213 NEXTL(l);
3214 c = CUR_CHAR(l);
3215 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3216 (((c >= 'a') && (c <= 'z')) ||
3217 ((c >= 'A') && (c <= 'Z')) ||
3218 ((c >= '0') && (c <= '9')) || /* !start */
3219 (c == '_') || (c == ':') ||
3220 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3221 ((c >= 0xC0) && (c <= 0xD6)) ||
3222 ((c >= 0xD8) && (c <= 0xF6)) ||
3223 ((c >= 0xF8) && (c <= 0x2FF)) ||
3224 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3225 ((c >= 0x370) && (c <= 0x37D)) ||
3226 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3227 ((c >= 0x200C) && (c <= 0x200D)) ||
3228 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3229 ((c >= 0x2070) && (c <= 0x218F)) ||
3230 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3231 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3232 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3233 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3234 ((c >= 0x10000) && (c <= 0xEFFFF))
3235 )) {
3236 if (count++ > XML_PARSER_CHUNK_SIZE) {
3237 count = 0;
3238 GROW;
3239 if (ctxt->instate == XML_PARSER_EOF)
3240 return(NULL);
3241 }
3242 len += l;
3243 NEXTL(l);
3244 c = CUR_CHAR(l);
3245 }
3246 } else {
3247 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248 (!IS_LETTER(c) && (c != '_') &&
3249 (c != ':'))) {
3250 return(NULL);
3251 }
3252 len += l;
3253 NEXTL(l);
3254 c = CUR_CHAR(l);
3255
3256 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3257 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3258 (c == '.') || (c == '-') ||
3259 (c == '_') || (c == ':') ||
3260 (IS_COMBINING(c)) ||
3261 (IS_EXTENDER(c)))) {
3262 if (count++ > XML_PARSER_CHUNK_SIZE) {
3263 count = 0;
3264 GROW;
3265 if (ctxt->instate == XML_PARSER_EOF)
3266 return(NULL);
3267 }
3268 len += l;
3269 NEXTL(l);
3270 c = CUR_CHAR(l);
3271 }
3272 }
3273 if ((len > XML_MAX_NAME_LENGTH) &&
3274 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3275 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3276 return(NULL);
3277 }
3278 if (ctxt->input->cur - ctxt->input->base < len) {
3279 /*
3280 * There were a couple of bugs where PERefs lead to to a change
3281 * of the buffer. Check the buffer size to avoid passing an invalid
3282 * pointer to xmlDictLookup.
3283 */
3284 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3285 "unexpected change of input buffer");
3286 return (NULL);
3287 }
3288 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3289 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3290 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3291 }
3292
3293 /**
3294 * xmlParseName:
3295 * @ctxt: an XML parser context
3296 *
3297 * parse an XML name.
3298 *
3299 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3300 * CombiningChar | Extender
3301 *
3302 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3303 *
3304 * [6] Names ::= Name (#x20 Name)*
3305 *
3306 * Returns the Name parsed or NULL
3307 */
3308
3309 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3310 xmlParseName(xmlParserCtxtPtr ctxt) {
3311 const xmlChar *in;
3312 const xmlChar *ret;
3313 int count = 0;
3314
3315 GROW;
3316
3317 #ifdef DEBUG
3318 nbParseName++;
3319 #endif
3320
3321 /*
3322 * Accelerator for simple ASCII names
3323 */
3324 in = ctxt->input->cur;
3325 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3326 ((*in >= 0x41) && (*in <= 0x5A)) ||
3327 (*in == '_') || (*in == ':')) {
3328 in++;
3329 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3330 ((*in >= 0x41) && (*in <= 0x5A)) ||
3331 ((*in >= 0x30) && (*in <= 0x39)) ||
3332 (*in == '_') || (*in == '-') ||
3333 (*in == ':') || (*in == '.'))
3334 in++;
3335 if ((*in > 0) && (*in < 0x80)) {
3336 count = in - ctxt->input->cur;
3337 if ((count > XML_MAX_NAME_LENGTH) &&
3338 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3339 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3340 return(NULL);
3341 }
3342 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3343 ctxt->input->cur = in;
3344 ctxt->nbChars += count;
3345 ctxt->input->col += count;
3346 if (ret == NULL)
3347 xmlErrMemory(ctxt, NULL);
3348 return(ret);
3349 }
3350 }
3351 /* accelerator for special cases */
3352 return(xmlParseNameComplex(ctxt));
3353 }
3354
3355 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3356 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3357 int len = 0, l;
3358 int c;
3359 int count = 0;
3360 size_t startPosition = 0;
3361
3362 #ifdef DEBUG
3363 nbParseNCNameComplex++;
3364 #endif
3365
3366 /*
3367 * Handler for more complex cases
3368 */
3369 GROW;
3370 startPosition = CUR_PTR - BASE_PTR;
3371 c = CUR_CHAR(l);
3372 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3373 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3374 return(NULL);
3375 }
3376
3377 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3378 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3379 if (count++ > XML_PARSER_CHUNK_SIZE) {
3380 if ((len > XML_MAX_NAME_LENGTH) &&
3381 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3382 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3383 return(NULL);
3384 }
3385 count = 0;
3386 GROW;
3387 if (ctxt->instate == XML_PARSER_EOF)
3388 return(NULL);
3389 }
3390 len += l;
3391 NEXTL(l);
3392 c = CUR_CHAR(l);
3393 if (c == 0) {
3394 count = 0;
3395 /*
3396 * when shrinking to extend the buffer we really need to preserve
3397 * the part of the name we already parsed. Hence rolling back
3398 * by current lenght.
3399 */
3400 ctxt->input->cur -= l;
3401 GROW;
3402 if (ctxt->instate == XML_PARSER_EOF)
3403 return(NULL);
3404 ctxt->input->cur += l;
3405 c = CUR_CHAR(l);
3406 }
3407 }
3408 if ((len > XML_MAX_NAME_LENGTH) &&
3409 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3410 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3411 return(NULL);
3412 }
3413 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3414 }
3415
3416 /**
3417 * xmlParseNCName:
3418 * @ctxt: an XML parser context
3419 * @len: length of the string parsed
3420 *
3421 * parse an XML name.
3422 *
3423 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3424 * CombiningChar | Extender
3425 *
3426 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3427 *
3428 * Returns the Name parsed or NULL
3429 */
3430
3431 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3432 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3433 const xmlChar *in, *e;
3434 const xmlChar *ret;
3435 int count = 0;
3436
3437 #ifdef DEBUG
3438 nbParseNCName++;
3439 #endif
3440
3441 /*
3442 * Accelerator for simple ASCII names
3443 */
3444 in = ctxt->input->cur;
3445 e = ctxt->input->end;
3446 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3447 ((*in >= 0x41) && (*in <= 0x5A)) ||
3448 (*in == '_')) && (in < e)) {
3449 in++;
3450 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3451 ((*in >= 0x41) && (*in <= 0x5A)) ||
3452 ((*in >= 0x30) && (*in <= 0x39)) ||
3453 (*in == '_') || (*in == '-') ||
3454 (*in == '.')) && (in < e))
3455 in++;
3456 if (in >= e)
3457 goto complex;
3458 if ((*in > 0) && (*in < 0x80)) {
3459 count = in - ctxt->input->cur;
3460 if ((count > XML_MAX_NAME_LENGTH) &&
3461 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3462 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3463 return(NULL);
3464 }
3465 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3466 ctxt->input->cur = in;
3467 ctxt->nbChars += count;
3468 ctxt->input->col += count;
3469 if (ret == NULL) {
3470 xmlErrMemory(ctxt, NULL);
3471 }
3472 return(ret);
3473 }
3474 }
3475 complex:
3476 return(xmlParseNCNameComplex(ctxt));
3477 }
3478
3479 /**
3480 * xmlParseNameAndCompare:
3481 * @ctxt: an XML parser context
3482 *
3483 * parse an XML name and compares for match
3484 * (specialized for endtag parsing)
3485 *
3486 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3487 * and the name for mismatch
3488 */
3489
3490 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3491 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3492 register const xmlChar *cmp = other;
3493 register const xmlChar *in;
3494 const xmlChar *ret;
3495
3496 GROW;
3497 if (ctxt->instate == XML_PARSER_EOF)
3498 return(NULL);
3499
3500 in = ctxt->input->cur;
3501 while (*in != 0 && *in == *cmp) {
3502 ++in;
3503 ++cmp;
3504 ctxt->input->col++;
3505 }
3506 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3507 /* success */
3508 ctxt->input->cur = in;
3509 return (const xmlChar*) 1;
3510 }
3511 /* failure (or end of input buffer), check with full function */
3512 ret = xmlParseName (ctxt);
3513 /* strings coming from the dictionary direct compare possible */
3514 if (ret == other) {
3515 return (const xmlChar*) 1;
3516 }
3517 return ret;
3518 }
3519
3520 /**
3521 * xmlParseStringName:
3522 * @ctxt: an XML parser context
3523 * @str: a pointer to the string pointer (IN/OUT)
3524 *
3525 * parse an XML name.
3526 *
3527 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3528 * CombiningChar | Extender
3529 *
3530 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3531 *
3532 * [6] Names ::= Name (#x20 Name)*
3533 *
3534 * Returns the Name parsed or NULL. The @str pointer
3535 * is updated to the current location in the string.
3536 */
3537
3538 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3539 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3540 xmlChar buf[XML_MAX_NAMELEN + 5];
3541 const xmlChar *cur = *str;
3542 int len = 0, l;
3543 int c;
3544
3545 #ifdef DEBUG
3546 nbParseStringName++;
3547 #endif
3548
3549 c = CUR_SCHAR(cur, l);
3550 if (!xmlIsNameStartChar(ctxt, c)) {
3551 return(NULL);
3552 }
3553
3554 COPY_BUF(l,buf,len,c);
3555 cur += l;
3556 c = CUR_SCHAR(cur, l);
3557 while (xmlIsNameChar(ctxt, c)) {
3558 COPY_BUF(l,buf,len,c);
3559 cur += l;
3560 c = CUR_SCHAR(cur, l);
3561 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3562 /*
3563 * Okay someone managed to make a huge name, so he's ready to pay
3564 * for the processing speed.
3565 */
3566 xmlChar *buffer;
3567 int max = len * 2;
3568
3569 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3570 if (buffer == NULL) {
3571 xmlErrMemory(ctxt, NULL);
3572 return(NULL);
3573 }
3574 memcpy(buffer, buf, len);
3575 while (xmlIsNameChar(ctxt, c)) {
3576 if (len + 10 > max) {
3577 xmlChar *tmp;
3578
3579 if ((len > XML_MAX_NAME_LENGTH) &&
3580 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3581 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3582 xmlFree(buffer);
3583 return(NULL);
3584 }
3585 max *= 2;
3586 tmp = (xmlChar *) xmlRealloc(buffer,
3587 max * sizeof(xmlChar));
3588 if (tmp == NULL) {
3589 xmlErrMemory(ctxt, NULL);
3590 xmlFree(buffer);
3591 return(NULL);
3592 }
3593 buffer = tmp;
3594 }
3595 COPY_BUF(l,buffer,len,c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 }
3599 buffer[len] = 0;
3600 *str = cur;
3601 return(buffer);
3602 }
3603 }
3604 if ((len > XML_MAX_NAME_LENGTH) &&
3605 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3606 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3607 return(NULL);
3608 }
3609 *str = cur;
3610 return(xmlStrndup(buf, len));
3611 }
3612
3613 /**
3614 * xmlParseNmtoken:
3615 * @ctxt: an XML parser context
3616 *
3617 * parse an XML Nmtoken.
3618 *
3619 * [7] Nmtoken ::= (NameChar)+
3620 *
3621 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3622 *
3623 * Returns the Nmtoken parsed or NULL
3624 */
3625
3626 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3627 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3628 xmlChar buf[XML_MAX_NAMELEN + 5];
3629 int len = 0, l;
3630 int c;
3631 int count = 0;
3632
3633 #ifdef DEBUG
3634 nbParseNmToken++;
3635 #endif
3636
3637 GROW;
3638 if (ctxt->instate == XML_PARSER_EOF)
3639 return(NULL);
3640 c = CUR_CHAR(l);
3641
3642 while (xmlIsNameChar(ctxt, c)) {
3643 if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 count = 0;
3645 GROW;
3646 }
3647 COPY_BUF(l,buf,len,c);
3648 NEXTL(l);
3649 c = CUR_CHAR(l);
3650 if (c == 0) {
3651 count = 0;
3652 GROW;
3653 if (ctxt->instate == XML_PARSER_EOF)
3654 return(NULL);
3655 c = CUR_CHAR(l);
3656 }
3657 if (len >= XML_MAX_NAMELEN) {
3658 /*
3659 * Okay someone managed to make a huge token, so he's ready to pay
3660 * for the processing speed.
3661 */
3662 xmlChar *buffer;
3663 int max = len * 2;
3664
3665 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3666 if (buffer == NULL) {
3667 xmlErrMemory(ctxt, NULL);
3668 return(NULL);
3669 }
3670 memcpy(buffer, buf, len);
3671 while (xmlIsNameChar(ctxt, c)) {
3672 if (count++ > XML_PARSER_CHUNK_SIZE) {
3673 count = 0;
3674 GROW;
3675 if (ctxt->instate == XML_PARSER_EOF) {
3676 xmlFree(buffer);
3677 return(NULL);
3678 }
3679 }
3680 if (len + 10 > max) {
3681 xmlChar *tmp;
3682
3683 if ((max > XML_MAX_NAME_LENGTH) &&
3684 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3685 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3686 xmlFree(buffer);
3687 return(NULL);
3688 }
3689 max *= 2;
3690 tmp = (xmlChar *) xmlRealloc(buffer,
3691 max * sizeof(xmlChar));
3692 if (tmp == NULL) {
3693 xmlErrMemory(ctxt, NULL);
3694 xmlFree(buffer);
3695 return(NULL);
3696 }
3697 buffer = tmp;
3698 }
3699 COPY_BUF(l,buffer,len,c);
3700 NEXTL(l);
3701 c = CUR_CHAR(l);
3702 }
3703 buffer[len] = 0;
3704 return(buffer);
3705 }
3706 }
3707 if (len == 0)
3708 return(NULL);
3709 if ((len > XML_MAX_NAME_LENGTH) &&
3710 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3711 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3712 return(NULL);
3713 }
3714 return(xmlStrndup(buf, len));
3715 }
3716
3717 /**
3718 * xmlParseEntityValue:
3719 * @ctxt: an XML parser context
3720 * @orig: if non-NULL store a copy of the original entity value
3721 *
3722 * parse a value for ENTITY declarations
3723 *
3724 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3725 * "'" ([^%&'] | PEReference | Reference)* "'"
3726 *
3727 * Returns the EntityValue parsed with reference substituted or NULL
3728 */
3729
3730 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3731 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3732 xmlChar *buf = NULL;
3733 int len = 0;
3734 int size = XML_PARSER_BUFFER_SIZE;
3735 int c, l;
3736 xmlChar stop;
3737 xmlChar *ret = NULL;
3738 const xmlChar *cur = NULL;
3739 xmlParserInputPtr input;
3740
3741 if (RAW == '"') stop = '"';
3742 else if (RAW == '\'') stop = '\'';
3743 else {
3744 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3745 return(NULL);
3746 }
3747 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3748 if (buf == NULL) {
3749 xmlErrMemory(ctxt, NULL);
3750 return(NULL);
3751 }
3752
3753 /*
3754 * The content of the entity definition is copied in a buffer.
3755 */
3756
3757 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3758 input = ctxt->input;
3759 GROW;
3760 if (ctxt->instate == XML_PARSER_EOF)
3761 goto error;
3762 NEXT;
3763 c = CUR_CHAR(l);
3764 /*
3765 * NOTE: 4.4.5 Included in Literal
3766 * When a parameter entity reference appears in a literal entity
3767 * value, ... a single or double quote character in the replacement
3768 * text is always treated as a normal data character and will not
3769 * terminate the literal.
3770 * In practice it means we stop the loop only when back at parsing
3771 * the initial entity and the quote is found
3772 */
3773 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3774 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3775 if (len + 5 >= size) {
3776 xmlChar *tmp;
3777
3778 size *= 2;
3779 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3780 if (tmp == NULL) {
3781 xmlErrMemory(ctxt, NULL);
3782 goto error;
3783 }
3784 buf = tmp;
3785 }
3786 COPY_BUF(l,buf,len,c);
3787 NEXTL(l);
3788
3789 GROW;
3790 c = CUR_CHAR(l);
3791 if (c == 0) {
3792 GROW;
3793 c = CUR_CHAR(l);
3794 }
3795 }
3796 buf[len] = 0;
3797 if (ctxt->instate == XML_PARSER_EOF)
3798 goto error;
3799 if (c != stop) {
3800 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3801 goto error;
3802 }
3803 NEXT;
3804
3805 /*
3806 * Raise problem w.r.t. '&' and '%' being used in non-entities
3807 * reference constructs. Note Charref will be handled in
3808 * xmlStringDecodeEntities()
3809 */
3810 cur = buf;
3811 while (*cur != 0) { /* non input consuming */
3812 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3813 xmlChar *name;
3814 xmlChar tmp = *cur;
3815 int nameOk = 0;
3816
3817 cur++;
3818 name = xmlParseStringName(ctxt, &cur);
3819 if (name != NULL) {
3820 nameOk = 1;
3821 xmlFree(name);
3822 }
3823 if ((nameOk == 0) || (*cur != ';')) {
3824 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3825 "EntityValue: '%c' forbidden except for entities references\n",
3826 tmp);
3827 goto error;
3828 }
3829 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3830 (ctxt->inputNr == 1)) {
3831 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3832 goto error;
3833 }
3834 if (*cur == 0)
3835 break;
3836 }
3837 cur++;
3838 }
3839
3840 /*
3841 * Then PEReference entities are substituted.
3842 *
3843 * NOTE: 4.4.7 Bypassed
3844 * When a general entity reference appears in the EntityValue in
3845 * an entity declaration, it is bypassed and left as is.
3846 * so XML_SUBSTITUTE_REF is not set here.
3847 */
3848 ++ctxt->depth;
3849 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3850 0, 0, 0);
3851 --ctxt->depth;
3852 if (orig != NULL) {
3853 *orig = buf;
3854 buf = NULL;
3855 }
3856
3857 error:
3858 if (buf != NULL)
3859 xmlFree(buf);
3860 return(ret);
3861 }
3862
3863 /**
3864 * xmlParseAttValueComplex:
3865 * @ctxt: an XML parser context
3866 * @len: the resulting attribute len
3867 * @normalize: wether to apply the inner normalization
3868 *
3869 * parse a value for an attribute, this is the fallback function
3870 * of xmlParseAttValue() when the attribute parsing requires handling
3871 * of non-ASCII characters, or normalization compaction.
3872 *
3873 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3874 */
3875 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3876 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3877 xmlChar limit = 0;
3878 xmlChar *buf = NULL;
3879 xmlChar *rep = NULL;
3880 size_t len = 0;
3881 size_t buf_size = 0;
3882 int c, l, in_space = 0;
3883 xmlChar *current = NULL;
3884 xmlEntityPtr ent;
3885
3886 if (NXT(0) == '"') {
3887 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3888 limit = '"';
3889 NEXT;
3890 } else if (NXT(0) == '\'') {
3891 limit = '\'';
3892 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3893 NEXT;
3894 } else {
3895 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3896 return(NULL);
3897 }
3898
3899 /*
3900 * allocate a translation buffer.
3901 */
3902 buf_size = XML_PARSER_BUFFER_SIZE;
3903 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3904 if (buf == NULL) goto mem_error;
3905
3906 /*
3907 * OK loop until we reach one of the ending char or a size limit.
3908 */
3909 c = CUR_CHAR(l);
3910 while (((NXT(0) != limit) && /* checked */
3911 (IS_CHAR(c)) && (c != '<')) &&
3912 (ctxt->instate != XML_PARSER_EOF)) {
3913 /*
3914 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3915 * special option is given
3916 */
3917 if ((len > XML_MAX_TEXT_LENGTH) &&
3918 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3919 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3920 "AttValue length too long\n");
3921 goto mem_error;
3922 }
3923 if (c == 0) break;
3924 if (c == '&') {
3925 in_space = 0;
3926 if (NXT(1) == '#') {
3927 int val = xmlParseCharRef(ctxt);
3928
3929 if (val == '&') {
3930 if (ctxt->replaceEntities) {
3931 if (len + 10 > buf_size) {
3932 growBuffer(buf, 10);
3933 }
3934 buf[len++] = '&';
3935 } else {
3936 /*
3937 * The reparsing will be done in xmlStringGetNodeList()
3938 * called by the attribute() function in SAX.c
3939 */
3940 if (len + 10 > buf_size) {
3941 growBuffer(buf, 10);
3942 }
3943 buf[len++] = '&';
3944 buf[len++] = '#';
3945 buf[len++] = '3';
3946 buf[len++] = '8';
3947 buf[len++] = ';';
3948 }
3949 } else if (val != 0) {
3950 if (len + 10 > buf_size) {
3951 growBuffer(buf, 10);
3952 }
3953 len += xmlCopyChar(0, &buf[len], val);
3954 }
3955 } else {
3956 ent = xmlParseEntityRef(ctxt);
3957 ctxt->nbentities++;
3958 if (ent != NULL)
3959 ctxt->nbentities += ent->owner;
3960 if ((ent != NULL) &&
3961 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3962 if (len + 10 > buf_size) {
3963 growBuffer(buf, 10);
3964 }
3965 if ((ctxt->replaceEntities == 0) &&
3966 (ent->content[0] == '&')) {
3967 buf[len++] = '&';
3968 buf[len++] = '#';
3969 buf[len++] = '3';
3970 buf[len++] = '8';
3971 buf[len++] = ';';
3972 } else {
3973 buf[len++] = ent->content[0];
3974 }
3975 } else if ((ent != NULL) &&
3976 (ctxt->replaceEntities != 0)) {
3977 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3978 ++ctxt->depth;
3979 rep = xmlStringDecodeEntities(ctxt, ent->content,
3980 XML_SUBSTITUTE_REF,
3981 0, 0, 0);
3982 --ctxt->depth;
3983 if (rep != NULL) {
3984 current = rep;
3985 while (*current != 0) { /* non input consuming */
3986 if ((*current == 0xD) || (*current == 0xA) ||
3987 (*current == 0x9)) {
3988 buf[len++] = 0x20;
3989 current++;
3990 } else
3991 buf[len++] = *current++;
3992 if (len + 10 > buf_size) {
3993 growBuffer(buf, 10);
3994 }
3995 }
3996 xmlFree(rep);
3997 rep = NULL;
3998 }
3999 } else {
4000 if (len + 10 > buf_size) {
4001 growBuffer(buf, 10);
4002 }
4003 if (ent->content != NULL)
4004 buf[len++] = ent->content[0];
4005 }
4006 } else if (ent != NULL) {
4007 int i = xmlStrlen(ent->name);
4008 const xmlChar *cur = ent->name;
4009
4010 /*
4011 * This may look absurd but is needed to detect
4012 * entities problems
4013 */
4014 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4015 (ent->content != NULL) && (ent->checked == 0)) {
4016 unsigned long oldnbent = ctxt->nbentities;
4017
4018 ++ctxt->depth;
4019 rep = xmlStringDecodeEntities(ctxt, ent->content,
4020 XML_SUBSTITUTE_REF, 0, 0, 0);
4021 --ctxt->depth;
4022
4023 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4024 if (rep != NULL) {
4025 if (xmlStrchr(rep, '<'))
4026 ent->checked |= 1;
4027 xmlFree(rep);
4028 rep = NULL;
4029 } else {
4030 ent->content[0] = 0;
4031 }
4032 }
4033
4034 /*
4035 * Just output the reference
4036 */
4037 buf[len++] = '&';
4038 while (len + i + 10 > buf_size) {
4039 growBuffer(buf, i + 10);
4040 }
4041 for (;i > 0;i--)
4042 buf[len++] = *cur++;
4043 buf[len++] = ';';
4044 }
4045 }
4046 } else {
4047 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4048 if ((len != 0) || (!normalize)) {
4049 if ((!normalize) || (!in_space)) {
4050 COPY_BUF(l,buf,len,0x20);
4051 while (len + 10 > buf_size) {
4052 growBuffer(buf, 10);
4053 }
4054 }
4055 in_space = 1;
4056 }
4057 } else {
4058 in_space = 0;
4059 COPY_BUF(l,buf,len,c);
4060 if (len + 10 > buf_size) {
4061 growBuffer(buf, 10);
4062 }
4063 }
4064 NEXTL(l);
4065 }
4066 GROW;
4067 c = CUR_CHAR(l);
4068 }
4069 if (ctxt->instate == XML_PARSER_EOF)
4070 goto error;
4071
4072 if ((in_space) && (normalize)) {
4073 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4074 }
4075 buf[len] = 0;
4076 if (RAW == '<') {
4077 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4078 } else if (RAW != limit) {
4079 if ((c != 0) && (!IS_CHAR(c))) {
4080 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4081 "invalid character in attribute value\n");
4082 } else {
4083 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4084 "AttValue: ' expected\n");
4085 }
4086 } else
4087 NEXT;
4088
4089 /*
4090 * There we potentially risk an overflow, don't allow attribute value of
4091 * length more than INT_MAX it is a very reasonnable assumption !
4092 */
4093 if (len >= INT_MAX) {
4094 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4095 "AttValue length too long\n");
4096 goto mem_error;
4097 }
4098
4099 if (attlen != NULL) *attlen = (int) len;
4100 return(buf);
4101
4102 mem_error:
4103 xmlErrMemory(ctxt, NULL);
4104 error:
4105 if (buf != NULL)
4106 xmlFree(buf);
4107 if (rep != NULL)
4108 xmlFree(rep);
4109 return(NULL);
4110 }
4111
4112 /**
4113 * xmlParseAttValue:
4114 * @ctxt: an XML parser context
4115 *
4116 * parse a value for an attribute
4117 * Note: the parser won't do substitution of entities here, this
4118 * will be handled later in xmlStringGetNodeList
4119 *
4120 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4121 * "'" ([^<&'] | Reference)* "'"
4122 *
4123 * 3.3.3 Attribute-Value Normalization:
4124 * Before the value of an attribute is passed to the application or
4125 * checked for validity, the XML processor must normalize it as follows:
4126 * - a character reference is processed by appending the referenced
4127 * character to the attribute value
4128 * - an entity reference is processed by recursively processing the
4129 * replacement text of the entity
4130 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4131 * appending #x20 to the normalized value, except that only a single
4132 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4133 * parsed entity or the literal entity value of an internal parsed entity
4134 * - other characters are processed by appending them to the normalized value
4135 * If the declared value is not CDATA, then the XML processor must further
4136 * process the normalized attribute value by discarding any leading and
4137 * trailing space (#x20) characters, and by replacing sequences of space
4138 * (#x20) characters by a single space (#x20) character.
4139 * All attributes for which no declaration has been read should be treated
4140 * by a non-validating parser as if declared CDATA.
4141 *
4142 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4143 */
4144
4145
4146 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4147 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4148 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4149 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4150 }
4151
4152 /**
4153 * xmlParseSystemLiteral:
4154 * @ctxt: an XML parser context
4155 *
4156 * parse an XML Literal
4157 *
4158 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4159 *
4160 * Returns the SystemLiteral parsed or NULL
4161 */
4162
4163 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4164 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4165 xmlChar *buf = NULL;
4166 int len = 0;
4167 int size = XML_PARSER_BUFFER_SIZE;
4168 int cur, l;
4169 xmlChar stop;
4170 int state = ctxt->instate;
4171 int count = 0;
4172
4173 SHRINK;
4174 if (RAW == '"') {
4175 NEXT;
4176 stop = '"';
4177 } else if (RAW == '\'') {
4178 NEXT;
4179 stop = '\'';
4180 } else {
4181 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4182 return(NULL);
4183 }
4184
4185 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4186 if (buf == NULL) {
4187 xmlErrMemory(ctxt, NULL);
4188 return(NULL);
4189 }
4190 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4191 cur = CUR_CHAR(l);
4192 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4193 if (len + 5 >= size) {
4194 xmlChar *tmp;
4195
4196 if ((size > XML_MAX_NAME_LENGTH) &&
4197 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4198 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4199 xmlFree(buf);
4200 ctxt->instate = (xmlParserInputState) state;
4201 return(NULL);
4202 }
4203 size *= 2;
4204 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4205 if (tmp == NULL) {
4206 xmlFree(buf);
4207 xmlErrMemory(ctxt, NULL);
4208 ctxt->instate = (xmlParserInputState) state;
4209 return(NULL);
4210 }
4211 buf = tmp;
4212 }
4213 count++;
4214 if (count > 50) {
4215 GROW;
4216 count = 0;
4217 if (ctxt->instate == XML_PARSER_EOF) {
4218 xmlFree(buf);
4219 return(NULL);
4220 }
4221 }
4222 COPY_BUF(l,buf,len,cur);
4223 NEXTL(l);
4224 cur = CUR_CHAR(l);
4225 if (cur == 0) {
4226 GROW;
4227 SHRINK;
4228 cur = CUR_CHAR(l);
4229 }
4230 }
4231 buf[len] = 0;
4232 ctxt->instate = (xmlParserInputState) state;
4233 if (!IS_CHAR(cur)) {
4234 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4235 } else {
4236 NEXT;
4237 }
4238 return(buf);
4239 }
4240
4241 /**
4242 * xmlParsePubidLiteral:
4243 * @ctxt: an XML parser context
4244 *
4245 * parse an XML public literal
4246 *
4247 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4248 *
4249 * Returns the PubidLiteral parsed or NULL.
4250 */
4251
4252 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4253 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4254 xmlChar *buf = NULL;
4255 int len = 0;
4256 int size = XML_PARSER_BUFFER_SIZE;
4257 xmlChar cur;
4258 xmlChar stop;
4259 int count = 0;
4260 xmlParserInputState oldstate = ctxt->instate;
4261
4262 SHRINK;
4263 if (RAW == '"') {
4264 NEXT;
4265 stop = '"';
4266 } else if (RAW == '\'') {
4267 NEXT;
4268 stop = '\'';
4269 } else {
4270 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4271 return(NULL);
4272 }
4273 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4274 if (buf == NULL) {
4275 xmlErrMemory(ctxt, NULL);
4276 return(NULL);
4277 }
4278 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4279 cur = CUR;
4280 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4281 if (len + 1 >= size) {
4282 xmlChar *tmp;
4283
4284 if ((size > XML_MAX_NAME_LENGTH) &&
4285 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4286 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4287 xmlFree(buf);
4288 return(NULL);
4289 }
4290 size *= 2;
4291 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4292 if (tmp == NULL) {
4293 xmlErrMemory(ctxt, NULL);
4294 xmlFree(buf);
4295 return(NULL);
4296 }
4297 buf = tmp;
4298 }
4299 buf[len++] = cur;
4300 count++;
4301 if (count > 50) {
4302 GROW;
4303 count = 0;
4304 if (ctxt->instate == XML_PARSER_EOF) {
4305 xmlFree(buf);
4306 return(NULL);
4307 }
4308 }
4309 NEXT;
4310 cur = CUR;
4311 if (cur == 0) {
4312 GROW;
4313 SHRINK;
4314 cur = CUR;
4315 }
4316 }
4317 buf[len] = 0;
4318 if (cur != stop) {
4319 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320 } else {
4321 NEXT;
4322 }
4323 ctxt->instate = oldstate;
4324 return(buf);
4325 }
4326
4327 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4328
4329 /*
4330 * used for the test in the inner loop of the char data testing
4331 */
4332 static const unsigned char test_char_data[256] = {
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4338 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4339 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4340 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4341 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4342 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4343 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4344 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4345 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4346 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4347 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4348 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4349 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4350 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4351 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4352 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4353 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4354 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4365 };
4366
4367 /**
4368 * xmlParseCharData:
4369 * @ctxt: an XML parser context
4370 * @cdata: int indicating whether we are within a CDATA section
4371 *
4372 * parse a CharData section.
4373 * if we are within a CDATA section ']]>' marks an end of section.
4374 *
4375 * The right angle bracket (>) may be represented using the string ">",
4376 * and must, for compatibility, be escaped using ">" or a character
4377 * reference when it appears in the string "]]>" in content, when that
4378 * string is not marking the end of a CDATA section.
4379 *
4380 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4381 */
4382
4383 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4384 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4385 const xmlChar *in;
4386 int nbchar = 0;
4387 int line = ctxt->input->line;
4388 int col = ctxt->input->col;
4389 int ccol;
4390
4391 SHRINK;
4392 GROW;
4393 /*
4394 * Accelerated common case where input don't need to be
4395 * modified before passing it to the handler.
4396 */
4397 if (!cdata) {
4398 in = ctxt->input->cur;
4399 do {
4400 get_more_space:
4401 while (*in == 0x20) { in++; ctxt->input->col++; }
4402 if (*in == 0xA) {
4403 do {
4404 ctxt->input->line++; ctxt->input->col = 1;
4405 in++;
4406 } while (*in == 0xA);
4407 goto get_more_space;
4408 }
4409 if (*in == '<') {
4410 nbchar = in - ctxt->input->cur;
4411 if (nbchar > 0) {
4412 const xmlChar *tmp = ctxt->input->cur;
4413 ctxt->input->cur = in;
4414
4415 if ((ctxt->sax != NULL) &&
4416 (ctxt->sax->ignorableWhitespace !=
4417 ctxt->sax->characters)) {
4418 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4419 if (ctxt->sax->ignorableWhitespace != NULL)
4420 ctxt->sax->ignorableWhitespace(ctxt->userData,
4421 tmp, nbchar);
4422 } else {
4423 if (ctxt->sax->characters != NULL)
4424 ctxt->sax->characters(ctxt->userData,
4425 tmp, nbchar);
4426 if (*ctxt->space == -1)
4427 *ctxt->space = -2;
4428 }
4429 } else if ((ctxt->sax != NULL) &&
4430 (ctxt->sax->characters != NULL)) {
4431 ctxt->sax->characters(ctxt->userData,
4432 tmp, nbchar);
4433 }
4434 }
4435 return;
4436 }
4437
4438 get_more:
4439 ccol = ctxt->input->col;
4440 while (test_char_data[*in]) {
4441 in++;
4442 ccol++;
4443 }
4444 ctxt->input->col = ccol;
4445 if (*in == 0xA) {
4446 do {
4447 ctxt->input->line++; ctxt->input->col = 1;
4448 in++;
4449 } while (*in == 0xA);
4450 goto get_more;
4451 }
4452 if (*in == ']') {
4453 if ((in[1] == ']') && (in[2] == '>')) {
4454 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4455 ctxt->input->cur = in + 1;
4456 return;
4457 }
4458 in++;
4459 ctxt->input->col++;
4460 goto get_more;
4461 }
4462 nbchar = in - ctxt->input->cur;
4463 if (nbchar > 0) {
4464 if ((ctxt->sax != NULL) &&
4465 (ctxt->sax->ignorableWhitespace !=
4466 ctxt->sax->characters) &&
4467 (IS_BLANK_CH(*ctxt->input->cur))) {
4468 const xmlChar *tmp = ctxt->input->cur;
4469 ctxt->input->cur = in;
4470
4471 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4472 if (ctxt->sax->ignorableWhitespace != NULL)
4473 ctxt->sax->ignorableWhitespace(ctxt->userData,
4474 tmp, nbchar);
4475 } else {
4476 if (ctxt->sax->characters != NULL)
4477 ctxt->sax->characters(ctxt->userData,
4478 tmp, nbchar);
4479 if (*ctxt->space == -1)
4480 *ctxt->space = -2;
4481 }
4482 line = ctxt->input->line;
4483 col = ctxt->input->col;
4484 } else if (ctxt->sax != NULL) {
4485 if (ctxt->sax->characters != NULL)
4486 ctxt->sax->characters(ctxt->userData,
4487 ctxt->input->cur, nbchar);
4488 line = ctxt->input->line;
4489 col = ctxt->input->col;
4490 }
4491 /* something really bad happened in the SAX callback */
4492 if (ctxt->instate != XML_PARSER_CONTENT)
4493 return;
4494 }
4495 ctxt->input->cur = in;
4496 if (*in == 0xD) {
4497 in++;
4498 if (*in == 0xA) {
4499 ctxt->input->cur = in;
4500 in++;
4501 ctxt->input->line++; ctxt->input->col = 1;
4502 continue; /* while */
4503 }
4504 in--;
4505 }
4506 if (*in == '<') {
4507 return;
4508 }
4509 if (*in == '&') {
4510 return;
4511 }
4512 SHRINK;
4513 GROW;
4514 if (ctxt->instate == XML_PARSER_EOF)
4515 return;
4516 in = ctxt->input->cur;
4517 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4518 nbchar = 0;
4519 }
4520 ctxt->input->line = line;
4521 ctxt->input->col = col;
4522 xmlParseCharDataComplex(ctxt, cdata);
4523 }
4524
4525 /**
4526 * xmlParseCharDataComplex:
4527 * @ctxt: an XML parser context
4528 * @cdata: int indicating whether we are within a CDATA section
4529 *
4530 * parse a CharData section.this is the fallback function
4531 * of xmlParseCharData() when the parsing requires handling
4532 * of non-ASCII characters.
4533 */
4534 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4535 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4536 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4537 int nbchar = 0;
4538 int cur, l;
4539 int count = 0;
4540
4541 SHRINK;
4542 GROW;
4543 cur = CUR_CHAR(l);
4544 while ((cur != '<') && /* checked */
4545 (cur != '&') &&
4546 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4547 if ((cur == ']') && (NXT(1) == ']') &&
4548 (NXT(2) == '>')) {
4549 if (cdata) break;
4550 else {
4551 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4552 }
4553 }
4554 COPY_BUF(l,buf,nbchar,cur);
4555 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4556 buf[nbchar] = 0;
4557
4558 /*
4559 * OK the segment is to be consumed as chars.
4560 */
4561 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4562 if (areBlanks(ctxt, buf, nbchar, 0)) {
4563 if (ctxt->sax->ignorableWhitespace != NULL)
4564 ctxt->sax->ignorableWhitespace(ctxt->userData,
4565 buf, nbchar);
4566 } else {
4567 if (ctxt->sax->characters != NULL)
4568 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4569 if ((ctxt->sax->characters !=
4570 ctxt->sax->ignorableWhitespace) &&
4571 (*ctxt->space == -1))
4572 *ctxt->space = -2;
4573 }
4574 }
4575 nbchar = 0;
4576 /* something really bad happened in the SAX callback */
4577 if (ctxt->instate != XML_PARSER_CONTENT)
4578 return;
4579 }
4580 count++;
4581 if (count > 50) {
4582 GROW;
4583 count = 0;
4584 if (ctxt->instate == XML_PARSER_EOF)
4585 return;
4586 }
4587 NEXTL(l);
4588 cur = CUR_CHAR(l);
4589 }
4590 if (nbchar != 0) {
4591 buf[nbchar] = 0;
4592 /*
4593 * OK the segment is to be consumed as chars.
4594 */
4595 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4596 if (areBlanks(ctxt, buf, nbchar, 0)) {
4597 if (ctxt->sax->ignorableWhitespace != NULL)
4598 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4599 } else {
4600 if (ctxt->sax->characters != NULL)
4601 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4602 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4603 (*ctxt->space == -1))
4604 *ctxt->space = -2;
4605 }
4606 }
4607 }
4608 if ((cur != 0) && (!IS_CHAR(cur))) {
4609 /* Generate the error and skip the offending character */
4610 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4611 "PCDATA invalid Char value %d\n",
4612 cur);
4613 NEXTL(l);
4614 }
4615 }
4616
4617 /**
4618 * xmlParseExternalID:
4619 * @ctxt: an XML parser context
4620 * @publicID: a xmlChar** receiving PubidLiteral
4621 * @strict: indicate whether we should restrict parsing to only
4622 * production [75], see NOTE below
4623 *
4624 * Parse an External ID or a Public ID
4625 *
4626 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4627 * 'PUBLIC' S PubidLiteral S SystemLiteral
4628 *
4629 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4630 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4631 *
4632 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4633 *
4634 * Returns the function returns SystemLiteral and in the second
4635 * case publicID receives PubidLiteral, is strict is off
4636 * it is possible to return NULL and have publicID set.
4637 */
4638
4639 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4640 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4641 xmlChar *URI = NULL;
4642
4643 SHRINK;
4644
4645 *publicID = NULL;
4646 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4647 SKIP(6);
4648 if (SKIP_BLANKS == 0) {
4649 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4650 "Space required after 'SYSTEM'\n");
4651 }
4652 URI = xmlParseSystemLiteral(ctxt);
4653 if (URI == NULL) {
4654 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4655 }
4656 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4657 SKIP(6);
4658 if (SKIP_BLANKS == 0) {
4659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4660 "Space required after 'PUBLIC'\n");
4661 }
4662 *publicID = xmlParsePubidLiteral(ctxt);
4663 if (*publicID == NULL) {
4664 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4665 }
4666 if (strict) {
4667 /*
4668 * We don't handle [83] so "S SystemLiteral" is required.
4669 */
4670 if (SKIP_BLANKS == 0) {
4671 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4672 "Space required after the Public Identifier\n");
4673 }
4674 } else {
4675 /*
4676 * We handle [83] so we return immediately, if
4677 * "S SystemLiteral" is not detected. We skip blanks if no
4678 * system literal was found, but this is harmless since we must
4679 * be at the end of a NotationDecl.
4680 */
4681 if (SKIP_BLANKS == 0) return(NULL);
4682 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4683 }
4684 URI = xmlParseSystemLiteral(ctxt);
4685 if (URI == NULL) {
4686 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4687 }
4688 }
4689 return(URI);
4690 }
4691
4692 /**
4693 * xmlParseCommentComplex:
4694 * @ctxt: an XML parser context
4695 * @buf: the already parsed part of the buffer
4696 * @len: number of bytes filles in the buffer
4697 * @size: allocated size of the buffer
4698 *
4699 * Skip an XML (SGML) comment <!-- .... -->
4700 * The spec says that "For compatibility, the string "--" (double-hyphen)
4701 * must not occur within comments. "
4702 * This is the slow routine in case the accelerator for ascii didn't work
4703 *
4704 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4705 */
4706 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4707 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4708 size_t len, size_t size) {
4709 int q, ql;
4710 int r, rl;
4711 int cur, l;
4712 size_t count = 0;
4713 int inputid;
4714
4715 inputid = ctxt->input->id;
4716
4717 if (buf == NULL) {
4718 len = 0;
4719 size = XML_PARSER_BUFFER_SIZE;
4720 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4721 if (buf == NULL) {
4722 xmlErrMemory(ctxt, NULL);
4723 return;
4724 }
4725 }
4726 GROW; /* Assure there's enough input data */
4727 q = CUR_CHAR(ql);
4728 if (q == 0)
4729 goto not_terminated;
4730 if (!IS_CHAR(q)) {
4731 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4732 "xmlParseComment: invalid xmlChar value %d\n",
4733 q);
4734 xmlFree (buf);
4735 return;
4736 }
4737 NEXTL(ql);
4738 r = CUR_CHAR(rl);
4739 if (r == 0)
4740 goto not_terminated;
4741 if (!IS_CHAR(r)) {
4742 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4743 "xmlParseComment: invalid xmlChar value %d\n",
4744 q);
4745 xmlFree (buf);
4746 return;
4747 }
4748 NEXTL(rl);
4749 cur = CUR_CHAR(l);
4750 if (cur == 0)
4751 goto not_terminated;
4752 while (IS_CHAR(cur) && /* checked */
4753 ((cur != '>') ||
4754 (r != '-') || (q != '-'))) {
4755 if ((r == '-') && (q == '-')) {
4756 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4757 }
4758 if ((len > XML_MAX_TEXT_LENGTH) &&
4759 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4760 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4761 "Comment too big found", NULL);
4762 xmlFree (buf);
4763 return;
4764 }
4765 if (len + 5 >= size) {
4766 xmlChar *new_buf;
4767 size_t new_size;
4768
4769 new_size = size * 2;
4770 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4771 if (new_buf == NULL) {
4772 xmlFree (buf);
4773 xmlErrMemory(ctxt, NULL);
4774 return;
4775 }
4776 buf = new_buf;
4777 size = new_size;
4778 }
4779 COPY_BUF(ql,buf,len,q);
4780 q = r;
4781 ql = rl;
4782 r = cur;
4783 rl = l;
4784
4785 count++;
4786 if (count > 50) {
4787 GROW;
4788 count = 0;
4789 if (ctxt->instate == XML_PARSER_EOF) {
4790 xmlFree(buf);
4791 return;
4792 }
4793 }
4794 NEXTL(l);
4795 cur = CUR_CHAR(l);
4796 if (cur == 0) {
4797 SHRINK;
4798 GROW;
4799 cur = CUR_CHAR(l);
4800 }
4801 }
4802 buf[len] = 0;
4803 if (cur == 0) {
4804 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4805 "Comment not terminated \n<!--%.50s\n", buf);
4806 } else if (!IS_CHAR(cur)) {
4807 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4808 "xmlParseComment: invalid xmlChar value %d\n",
4809 cur);
4810 } else {
4811 if (inputid != ctxt->input->id) {
4812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4813 "Comment doesn't start and stop in the same"
4814 " entity\n");
4815 }
4816 NEXT;
4817 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4818 (!ctxt->disableSAX))
4819 ctxt->sax->comment(ctxt->userData, buf);
4820 }
4821 xmlFree(buf);
4822 return;
4823 not_terminated:
4824 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4825 "Comment not terminated\n", NULL);
4826 xmlFree(buf);
4827 return;
4828 }
4829
4830 /**
4831 * xmlParseComment:
4832 * @ctxt: an XML parser context
4833 *
4834 * Skip an XML (SGML) comment <!-- .... -->
4835 * The spec says that "For compatibility, the string "--" (double-hyphen)
4836 * must not occur within comments. "
4837 *
4838 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4839 */
4840 void
xmlParseComment(xmlParserCtxtPtr ctxt)4841 xmlParseComment(xmlParserCtxtPtr ctxt) {
4842 xmlChar *buf = NULL;
4843 size_t size = XML_PARSER_BUFFER_SIZE;
4844 size_t len = 0;
4845 xmlParserInputState state;
4846 const xmlChar *in;
4847 size_t nbchar = 0;
4848 int ccol;
4849 int inputid;
4850
4851 /*
4852 * Check that there is a comment right here.
4853 */
4854 if ((RAW != '<') || (NXT(1) != '!') ||
4855 (NXT(2) != '-') || (NXT(3) != '-')) return;
4856 state = ctxt->instate;
4857 ctxt->instate = XML_PARSER_COMMENT;
4858 inputid = ctxt->input->id;
4859 SKIP(4);
4860 SHRINK;
4861 GROW;
4862
4863 /*
4864 * Accelerated common case where input don't need to be
4865 * modified before passing it to the handler.
4866 */
4867 in = ctxt->input->cur;
4868 do {
4869 if (*in == 0xA) {
4870 do {
4871 ctxt->input->line++; ctxt->input->col = 1;
4872 in++;
4873 } while (*in == 0xA);
4874 }
4875 get_more:
4876 ccol = ctxt->input->col;
4877 while (((*in > '-') && (*in <= 0x7F)) ||
4878 ((*in >= 0x20) && (*in < '-')) ||
4879 (*in == 0x09)) {
4880 in++;
4881 ccol++;
4882 }
4883 ctxt->input->col = ccol;
4884 if (*in == 0xA) {
4885 do {
4886 ctxt->input->line++; ctxt->input->col = 1;
4887 in++;
4888 } while (*in == 0xA);
4889 goto get_more;
4890 }
4891 nbchar = in - ctxt->input->cur;
4892 /*
4893 * save current set of data
4894 */
4895 if (nbchar > 0) {
4896 if ((ctxt->sax != NULL) &&
4897 (ctxt->sax->comment != NULL)) {
4898 if (buf == NULL) {
4899 if ((*in == '-') && (in[1] == '-'))
4900 size = nbchar + 1;
4901 else
4902 size = XML_PARSER_BUFFER_SIZE + nbchar;
4903 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4904 if (buf == NULL) {
4905 xmlErrMemory(ctxt, NULL);
4906 ctxt->instate = state;
4907 return;
4908 }
4909 len = 0;
4910 } else if (len + nbchar + 1 >= size) {
4911 xmlChar *new_buf;
4912 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4913 new_buf = (xmlChar *) xmlRealloc(buf,
4914 size * sizeof(xmlChar));
4915 if (new_buf == NULL) {
4916 xmlFree (buf);
4917 xmlErrMemory(ctxt, NULL);
4918 ctxt->instate = state;
4919 return;
4920 }
4921 buf = new_buf;
4922 }
4923 memcpy(&buf[len], ctxt->input->cur, nbchar);
4924 len += nbchar;
4925 buf[len] = 0;
4926 }
4927 }
4928 if ((len > XML_MAX_TEXT_LENGTH) &&
4929 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4930 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4931 "Comment too big found", NULL);
4932 xmlFree (buf);
4933 return;
4934 }
4935 ctxt->input->cur = in;
4936 if (*in == 0xA) {
4937 in++;
4938 ctxt->input->line++; ctxt->input->col = 1;
4939 }
4940 if (*in == 0xD) {
4941 in++;
4942 if (*in == 0xA) {
4943 ctxt->input->cur = in;
4944 in++;
4945 ctxt->input->line++; ctxt->input->col = 1;
4946 continue; /* while */
4947 }
4948 in--;
4949 }
4950 SHRINK;
4951 GROW;
4952 if (ctxt->instate == XML_PARSER_EOF) {
4953 xmlFree(buf);
4954 return;
4955 }
4956 in = ctxt->input->cur;
4957 if (*in == '-') {
4958 if (in[1] == '-') {
4959 if (in[2] == '>') {
4960 if (ctxt->input->id != inputid) {
4961 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4962 "comment doesn't start and stop in the"
4963 " same entity\n");
4964 }
4965 SKIP(3);
4966 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4967 (!ctxt->disableSAX)) {
4968 if (buf != NULL)
4969 ctxt->sax->comment(ctxt->userData, buf);
4970 else
4971 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4972 }
4973 if (buf != NULL)
4974 xmlFree(buf);
4975 if (ctxt->instate != XML_PARSER_EOF)
4976 ctxt->instate = state;
4977 return;
4978 }
4979 if (buf != NULL) {
4980 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4981 "Double hyphen within comment: "
4982 "<!--%.50s\n",
4983 buf);
4984 } else
4985 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4986 "Double hyphen within comment\n", NULL);
4987 in++;
4988 ctxt->input->col++;
4989 }
4990 in++;
4991 ctxt->input->col++;
4992 goto get_more;
4993 }
4994 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4995 xmlParseCommentComplex(ctxt, buf, len, size);
4996 ctxt->instate = state;
4997 return;
4998 }
4999
5000
5001 /**
5002 * xmlParsePITarget:
5003 * @ctxt: an XML parser context
5004 *
5005 * parse the name of a PI
5006 *
5007 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5008 *
5009 * Returns the PITarget name or NULL
5010 */
5011
5012 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5013 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5014 const xmlChar *name;
5015
5016 name = xmlParseName(ctxt);
5017 if ((name != NULL) &&
5018 ((name[0] == 'x') || (name[0] == 'X')) &&
5019 ((name[1] == 'm') || (name[1] == 'M')) &&
5020 ((name[2] == 'l') || (name[2] == 'L'))) {
5021 int i;
5022 if ((name[0] == 'x') && (name[1] == 'm') &&
5023 (name[2] == 'l') && (name[3] == 0)) {
5024 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5025 "XML declaration allowed only at the start of the document\n");
5026 return(name);
5027 } else if (name[3] == 0) {
5028 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5029 return(name);
5030 }
5031 for (i = 0;;i++) {
5032 if (xmlW3CPIs[i] == NULL) break;
5033 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5034 return(name);
5035 }
5036 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5037 "xmlParsePITarget: invalid name prefix 'xml'\n",
5038 NULL, NULL);
5039 }
5040 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5041 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5042 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5043 }
5044 return(name);
5045 }
5046
5047 #ifdef LIBXML_CATALOG_ENABLED
5048 /**
5049 * xmlParseCatalogPI:
5050 * @ctxt: an XML parser context
5051 * @catalog: the PI value string
5052 *
5053 * parse an XML Catalog Processing Instruction.
5054 *
5055 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5056 *
5057 * Occurs only if allowed by the user and if happening in the Misc
5058 * part of the document before any doctype informations
5059 * This will add the given catalog to the parsing context in order
5060 * to be used if there is a resolution need further down in the document
5061 */
5062
5063 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5064 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5065 xmlChar *URL = NULL;
5066 const xmlChar *tmp, *base;
5067 xmlChar marker;
5068
5069 tmp = catalog;
5070 while (IS_BLANK_CH(*tmp)) tmp++;
5071 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5072 goto error;
5073 tmp += 7;
5074 while (IS_BLANK_CH(*tmp)) tmp++;
5075 if (*tmp != '=') {
5076 return;
5077 }
5078 tmp++;
5079 while (IS_BLANK_CH(*tmp)) tmp++;
5080 marker = *tmp;
5081 if ((marker != '\'') && (marker != '"'))
5082 goto error;
5083 tmp++;
5084 base = tmp;
5085 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5086 if (*tmp == 0)
5087 goto error;
5088 URL = xmlStrndup(base, tmp - base);
5089 tmp++;
5090 while (IS_BLANK_CH(*tmp)) tmp++;
5091 if (*tmp != 0)
5092 goto error;
5093
5094 if (URL != NULL) {
5095 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5096 xmlFree(URL);
5097 }
5098 return;
5099
5100 error:
5101 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5102 "Catalog PI syntax error: %s\n",
5103 catalog, NULL);
5104 if (URL != NULL)
5105 xmlFree(URL);
5106 }
5107 #endif
5108
5109 /**
5110 * xmlParsePI:
5111 * @ctxt: an XML parser context
5112 *
5113 * parse an XML Processing Instruction.
5114 *
5115 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5116 *
5117 * The processing is transfered to SAX once parsed.
5118 */
5119
5120 void
xmlParsePI(xmlParserCtxtPtr ctxt)5121 xmlParsePI(xmlParserCtxtPtr ctxt) {
5122 xmlChar *buf = NULL;
5123 size_t len = 0;
5124 size_t size = XML_PARSER_BUFFER_SIZE;
5125 int cur, l;
5126 const xmlChar *target;
5127 xmlParserInputState state;
5128 int count = 0;
5129
5130 if ((RAW == '<') && (NXT(1) == '?')) {
5131 int inputid = ctxt->input->id;
5132 state = ctxt->instate;
5133 ctxt->instate = XML_PARSER_PI;
5134 /*
5135 * this is a Processing Instruction.
5136 */
5137 SKIP(2);
5138 SHRINK;
5139
5140 /*
5141 * Parse the target name and check for special support like
5142 * namespace.
5143 */
5144 target = xmlParsePITarget(ctxt);
5145 if (target != NULL) {
5146 if ((RAW == '?') && (NXT(1) == '>')) {
5147 if (inputid != ctxt->input->id) {
5148 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5149 "PI declaration doesn't start and stop in"
5150 " the same entity\n");
5151 }
5152 SKIP(2);
5153
5154 /*
5155 * SAX: PI detected.
5156 */
5157 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5158 (ctxt->sax->processingInstruction != NULL))
5159 ctxt->sax->processingInstruction(ctxt->userData,
5160 target, NULL);
5161 if (ctxt->instate != XML_PARSER_EOF)
5162 ctxt->instate = state;
5163 return;
5164 }
5165 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5166 if (buf == NULL) {
5167 xmlErrMemory(ctxt, NULL);
5168 ctxt->instate = state;
5169 return;
5170 }
5171 if (SKIP_BLANKS == 0) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5173 "ParsePI: PI %s space expected\n", target);
5174 }
5175 cur = CUR_CHAR(l);
5176 while (IS_CHAR(cur) && /* checked */
5177 ((cur != '?') || (NXT(1) != '>'))) {
5178 if (len + 5 >= size) {
5179 xmlChar *tmp;
5180 size_t new_size = size * 2;
5181 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5182 if (tmp == NULL) {
5183 xmlErrMemory(ctxt, NULL);
5184 xmlFree(buf);
5185 ctxt->instate = state;
5186 return;
5187 }
5188 buf = tmp;
5189 size = new_size;
5190 }
5191 count++;
5192 if (count > 50) {
5193 GROW;
5194 if (ctxt->instate == XML_PARSER_EOF) {
5195 xmlFree(buf);
5196 return;
5197 }
5198 count = 0;
5199 if ((len > XML_MAX_TEXT_LENGTH) &&
5200 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5201 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5202 "PI %s too big found", target);
5203 xmlFree(buf);
5204 ctxt->instate = state;
5205 return;
5206 }
5207 }
5208 COPY_BUF(l,buf,len,cur);
5209 NEXTL(l);
5210 cur = CUR_CHAR(l);
5211 if (cur == 0) {
5212 SHRINK;
5213 GROW;
5214 cur = CUR_CHAR(l);
5215 }
5216 }
5217 if ((len > XML_MAX_TEXT_LENGTH) &&
5218 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5219 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5220 "PI %s too big found", target);
5221 xmlFree(buf);
5222 ctxt->instate = state;
5223 return;
5224 }
5225 buf[len] = 0;
5226 if (cur != '?') {
5227 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5228 "ParsePI: PI %s never end ...\n", target);
5229 } else {
5230 if (inputid != ctxt->input->id) {
5231 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5232 "PI declaration doesn't start and stop in"
5233 " the same entity\n");
5234 }
5235 SKIP(2);
5236
5237 #ifdef LIBXML_CATALOG_ENABLED
5238 if (((state == XML_PARSER_MISC) ||
5239 (state == XML_PARSER_START)) &&
5240 (xmlStrEqual(target, XML_CATALOG_PI))) {
5241 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5242 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5243 (allow == XML_CATA_ALLOW_ALL))
5244 xmlParseCatalogPI(ctxt, buf);
5245 }
5246 #endif
5247
5248
5249 /*
5250 * SAX: PI detected.
5251 */
5252 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5253 (ctxt->sax->processingInstruction != NULL))
5254 ctxt->sax->processingInstruction(ctxt->userData,
5255 target, buf);
5256 }
5257 xmlFree(buf);
5258 } else {
5259 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5260 }
5261 if (ctxt->instate != XML_PARSER_EOF)
5262 ctxt->instate = state;
5263 }
5264 }
5265
5266 /**
5267 * xmlParseNotationDecl:
5268 * @ctxt: an XML parser context
5269 *
5270 * parse a notation declaration
5271 *
5272 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5273 *
5274 * Hence there is actually 3 choices:
5275 * 'PUBLIC' S PubidLiteral
5276 * 'PUBLIC' S PubidLiteral S SystemLiteral
5277 * and 'SYSTEM' S SystemLiteral
5278 *
5279 * See the NOTE on xmlParseExternalID().
5280 */
5281
5282 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5283 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5284 const xmlChar *name;
5285 xmlChar *Pubid;
5286 xmlChar *Systemid;
5287
5288 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5289 int inputid = ctxt->input->id;
5290 SHRINK;
5291 SKIP(10);
5292 if (SKIP_BLANKS == 0) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 "Space required after '<!NOTATION'\n");
5295 return;
5296 }
5297
5298 name = xmlParseName(ctxt);
5299 if (name == NULL) {
5300 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5301 return;
5302 }
5303 if (xmlStrchr(name, ':') != NULL) {
5304 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5305 "colons are forbidden from notation names '%s'\n",
5306 name, NULL, NULL);
5307 }
5308 if (SKIP_BLANKS == 0) {
5309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5310 "Space required after the NOTATION name'\n");
5311 return;
5312 }
5313
5314 /*
5315 * Parse the IDs.
5316 */
5317 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5318 SKIP_BLANKS;
5319
5320 if (RAW == '>') {
5321 if (inputid != ctxt->input->id) {
5322 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5323 "Notation declaration doesn't start and stop"
5324 " in the same entity\n");
5325 }
5326 NEXT;
5327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5328 (ctxt->sax->notationDecl != NULL))
5329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5330 } else {
5331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5332 }
5333 if (Systemid != NULL) xmlFree(Systemid);
5334 if (Pubid != NULL) xmlFree(Pubid);
5335 }
5336 }
5337
5338 /**
5339 * xmlParseEntityDecl:
5340 * @ctxt: an XML parser context
5341 *
5342 * parse <!ENTITY declarations
5343 *
5344 * [70] EntityDecl ::= GEDecl | PEDecl
5345 *
5346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5347 *
5348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5349 *
5350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5351 *
5352 * [74] PEDef ::= EntityValue | ExternalID
5353 *
5354 * [76] NDataDecl ::= S 'NDATA' S Name
5355 *
5356 * [ VC: Notation Declared ]
5357 * The Name must match the declared name of a notation.
5358 */
5359
5360 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5361 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5362 const xmlChar *name = NULL;
5363 xmlChar *value = NULL;
5364 xmlChar *URI = NULL, *literal = NULL;
5365 const xmlChar *ndata = NULL;
5366 int isParameter = 0;
5367 xmlChar *orig = NULL;
5368
5369 /* GROW; done in the caller */
5370 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5371 int inputid = ctxt->input->id;
5372 SHRINK;
5373 SKIP(8);
5374 if (SKIP_BLANKS == 0) {
5375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 "Space required after '<!ENTITY'\n");
5377 }
5378
5379 if (RAW == '%') {
5380 NEXT;
5381 if (SKIP_BLANKS == 0) {
5382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5383 "Space required after '%%'\n");
5384 }
5385 isParameter = 1;
5386 }
5387
5388 name = xmlParseName(ctxt);
5389 if (name == NULL) {
5390 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5391 "xmlParseEntityDecl: no name\n");
5392 return;
5393 }
5394 if (xmlStrchr(name, ':') != NULL) {
5395 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396 "colons are forbidden from entities names '%s'\n",
5397 name, NULL, NULL);
5398 }
5399 if (SKIP_BLANKS == 0) {
5400 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5401 "Space required after the entity name\n");
5402 }
5403
5404 ctxt->instate = XML_PARSER_ENTITY_DECL;
5405 /*
5406 * handle the various case of definitions...
5407 */
5408 if (isParameter) {
5409 if ((RAW == '"') || (RAW == '\'')) {
5410 value = xmlParseEntityValue(ctxt, &orig);
5411 if (value) {
5412 if ((ctxt->sax != NULL) &&
5413 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5414 ctxt->sax->entityDecl(ctxt->userData, name,
5415 XML_INTERNAL_PARAMETER_ENTITY,
5416 NULL, NULL, value);
5417 }
5418 } else {
5419 URI = xmlParseExternalID(ctxt, &literal, 1);
5420 if ((URI == NULL) && (literal == NULL)) {
5421 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5422 }
5423 if (URI) {
5424 xmlURIPtr uri;
5425
5426 uri = xmlParseURI((const char *) URI);
5427 if (uri == NULL) {
5428 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5429 "Invalid URI: %s\n", URI);
5430 /*
5431 * This really ought to be a well formedness error
5432 * but the XML Core WG decided otherwise c.f. issue
5433 * E26 of the XML erratas.
5434 */
5435 } else {
5436 if (uri->fragment != NULL) {
5437 /*
5438 * Okay this is foolish to block those but not
5439 * invalid URIs.
5440 */
5441 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5442 } else {
5443 if ((ctxt->sax != NULL) &&
5444 (!ctxt->disableSAX) &&
5445 (ctxt->sax->entityDecl != NULL))
5446 ctxt->sax->entityDecl(ctxt->userData, name,
5447 XML_EXTERNAL_PARAMETER_ENTITY,
5448 literal, URI, NULL);
5449 }
5450 xmlFreeURI(uri);
5451 }
5452 }
5453 }
5454 } else {
5455 if ((RAW == '"') || (RAW == '\'')) {
5456 value = xmlParseEntityValue(ctxt, &orig);
5457 if ((ctxt->sax != NULL) &&
5458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5459 ctxt->sax->entityDecl(ctxt->userData, name,
5460 XML_INTERNAL_GENERAL_ENTITY,
5461 NULL, NULL, value);
5462 /*
5463 * For expat compatibility in SAX mode.
5464 */
5465 if ((ctxt->myDoc == NULL) ||
5466 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5467 if (ctxt->myDoc == NULL) {
5468 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5469 if (ctxt->myDoc == NULL) {
5470 xmlErrMemory(ctxt, "New Doc failed");
5471 return;
5472 }
5473 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5474 }
5475 if (ctxt->myDoc->intSubset == NULL)
5476 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5477 BAD_CAST "fake", NULL, NULL);
5478
5479 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5480 NULL, NULL, value);
5481 }
5482 } else {
5483 URI = xmlParseExternalID(ctxt, &literal, 1);
5484 if ((URI == NULL) && (literal == NULL)) {
5485 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5486 }
5487 if (URI) {
5488 xmlURIPtr uri;
5489
5490 uri = xmlParseURI((const char *)URI);
5491 if (uri == NULL) {
5492 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5493 "Invalid URI: %s\n", URI);
5494 /*
5495 * This really ought to be a well formedness error
5496 * but the XML Core WG decided otherwise c.f. issue
5497 * E26 of the XML erratas.
5498 */
5499 } else {
5500 if (uri->fragment != NULL) {
5501 /*
5502 * Okay this is foolish to block those but not
5503 * invalid URIs.
5504 */
5505 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5506 }
5507 xmlFreeURI(uri);
5508 }
5509 }
5510 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5511 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5512 "Space required before 'NDATA'\n");
5513 }
5514 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5515 SKIP(5);
5516 if (SKIP_BLANKS == 0) {
5517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5518 "Space required after 'NDATA'\n");
5519 }
5520 ndata = xmlParseName(ctxt);
5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5522 (ctxt->sax->unparsedEntityDecl != NULL))
5523 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5524 literal, URI, ndata);
5525 } else {
5526 if ((ctxt->sax != NULL) &&
5527 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5528 ctxt->sax->entityDecl(ctxt->userData, name,
5529 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5530 literal, URI, NULL);
5531 /*
5532 * For expat compatibility in SAX mode.
5533 * assuming the entity repalcement was asked for
5534 */
5535 if ((ctxt->replaceEntities != 0) &&
5536 ((ctxt->myDoc == NULL) ||
5537 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5538 if (ctxt->myDoc == NULL) {
5539 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5540 if (ctxt->myDoc == NULL) {
5541 xmlErrMemory(ctxt, "New Doc failed");
5542 return;
5543 }
5544 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5545 }
5546
5547 if (ctxt->myDoc->intSubset == NULL)
5548 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5549 BAD_CAST "fake", NULL, NULL);
5550 xmlSAX2EntityDecl(ctxt, name,
5551 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5552 literal, URI, NULL);
5553 }
5554 }
5555 }
5556 }
5557 if (ctxt->instate == XML_PARSER_EOF)
5558 goto done;
5559 SKIP_BLANKS;
5560 if (RAW != '>') {
5561 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5562 "xmlParseEntityDecl: entity %s not terminated\n", name);
5563 xmlHaltParser(ctxt);
5564 } else {
5565 if (inputid != ctxt->input->id) {
5566 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5567 "Entity declaration doesn't start and stop in"
5568 " the same entity\n");
5569 }
5570 NEXT;
5571 }
5572 if (orig != NULL) {
5573 /*
5574 * Ugly mechanism to save the raw entity value.
5575 */
5576 xmlEntityPtr cur = NULL;
5577
5578 if (isParameter) {
5579 if ((ctxt->sax != NULL) &&
5580 (ctxt->sax->getParameterEntity != NULL))
5581 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5582 } else {
5583 if ((ctxt->sax != NULL) &&
5584 (ctxt->sax->getEntity != NULL))
5585 cur = ctxt->sax->getEntity(ctxt->userData, name);
5586 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5587 cur = xmlSAX2GetEntity(ctxt, name);
5588 }
5589 }
5590 if ((cur != NULL) && (cur->orig == NULL)) {
5591 cur->orig = orig;
5592 orig = NULL;
5593 }
5594 }
5595
5596 done:
5597 if (value != NULL) xmlFree(value);
5598 if (URI != NULL) xmlFree(URI);
5599 if (literal != NULL) xmlFree(literal);
5600 if (orig != NULL) xmlFree(orig);
5601 }
5602 }
5603
5604 /**
5605 * xmlParseDefaultDecl:
5606 * @ctxt: an XML parser context
5607 * @value: Receive a possible fixed default value for the attribute
5608 *
5609 * Parse an attribute default declaration
5610 *
5611 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5612 *
5613 * [ VC: Required Attribute ]
5614 * if the default declaration is the keyword #REQUIRED, then the
5615 * attribute must be specified for all elements of the type in the
5616 * attribute-list declaration.
5617 *
5618 * [ VC: Attribute Default Legal ]
5619 * The declared default value must meet the lexical constraints of
5620 * the declared attribute type c.f. xmlValidateAttributeDecl()
5621 *
5622 * [ VC: Fixed Attribute Default ]
5623 * if an attribute has a default value declared with the #FIXED
5624 * keyword, instances of that attribute must match the default value.
5625 *
5626 * [ WFC: No < in Attribute Values ]
5627 * handled in xmlParseAttValue()
5628 *
5629 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5630 * or XML_ATTRIBUTE_FIXED.
5631 */
5632
5633 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5634 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5635 int val;
5636 xmlChar *ret;
5637
5638 *value = NULL;
5639 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5640 SKIP(9);
5641 return(XML_ATTRIBUTE_REQUIRED);
5642 }
5643 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5644 SKIP(8);
5645 return(XML_ATTRIBUTE_IMPLIED);
5646 }
5647 val = XML_ATTRIBUTE_NONE;
5648 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5649 SKIP(6);
5650 val = XML_ATTRIBUTE_FIXED;
5651 if (SKIP_BLANKS == 0) {
5652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5653 "Space required after '#FIXED'\n");
5654 }
5655 }
5656 ret = xmlParseAttValue(ctxt);
5657 ctxt->instate = XML_PARSER_DTD;
5658 if (ret == NULL) {
5659 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5660 "Attribute default value declaration error\n");
5661 } else
5662 *value = ret;
5663 return(val);
5664 }
5665
5666 /**
5667 * xmlParseNotationType:
5668 * @ctxt: an XML parser context
5669 *
5670 * parse an Notation attribute type.
5671 *
5672 * Note: the leading 'NOTATION' S part has already being parsed...
5673 *
5674 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5675 *
5676 * [ VC: Notation Attributes ]
5677 * Values of this type must match one of the notation names included
5678 * in the declaration; all notation names in the declaration must be declared.
5679 *
5680 * Returns: the notation attribute tree built while parsing
5681 */
5682
5683 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5684 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5685 const xmlChar *name;
5686 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5687
5688 if (RAW != '(') {
5689 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5690 return(NULL);
5691 }
5692 SHRINK;
5693 do {
5694 NEXT;
5695 SKIP_BLANKS;
5696 name = xmlParseName(ctxt);
5697 if (name == NULL) {
5698 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5699 "Name expected in NOTATION declaration\n");
5700 xmlFreeEnumeration(ret);
5701 return(NULL);
5702 }
5703 tmp = ret;
5704 while (tmp != NULL) {
5705 if (xmlStrEqual(name, tmp->name)) {
5706 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5707 "standalone: attribute notation value token %s duplicated\n",
5708 name, NULL);
5709 if (!xmlDictOwns(ctxt->dict, name))
5710 xmlFree((xmlChar *) name);
5711 break;
5712 }
5713 tmp = tmp->next;
5714 }
5715 if (tmp == NULL) {
5716 cur = xmlCreateEnumeration(name);
5717 if (cur == NULL) {
5718 xmlFreeEnumeration(ret);
5719 return(NULL);
5720 }
5721 if (last == NULL) ret = last = cur;
5722 else {
5723 last->next = cur;
5724 last = cur;
5725 }
5726 }
5727 SKIP_BLANKS;
5728 } while (RAW == '|');
5729 if (RAW != ')') {
5730 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5731 xmlFreeEnumeration(ret);
5732 return(NULL);
5733 }
5734 NEXT;
5735 return(ret);
5736 }
5737
5738 /**
5739 * xmlParseEnumerationType:
5740 * @ctxt: an XML parser context
5741 *
5742 * parse an Enumeration attribute type.
5743 *
5744 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5745 *
5746 * [ VC: Enumeration ]
5747 * Values of this type must match one of the Nmtoken tokens in
5748 * the declaration
5749 *
5750 * Returns: the enumeration attribute tree built while parsing
5751 */
5752
5753 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5754 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5755 xmlChar *name;
5756 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5757
5758 if (RAW != '(') {
5759 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5760 return(NULL);
5761 }
5762 SHRINK;
5763 do {
5764 NEXT;
5765 SKIP_BLANKS;
5766 name = xmlParseNmtoken(ctxt);
5767 if (name == NULL) {
5768 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5769 return(ret);
5770 }
5771 tmp = ret;
5772 while (tmp != NULL) {
5773 if (xmlStrEqual(name, tmp->name)) {
5774 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5775 "standalone: attribute enumeration value token %s duplicated\n",
5776 name, NULL);
5777 if (!xmlDictOwns(ctxt->dict, name))
5778 xmlFree(name);
5779 break;
5780 }
5781 tmp = tmp->next;
5782 }
5783 if (tmp == NULL) {
5784 cur = xmlCreateEnumeration(name);
5785 if (!xmlDictOwns(ctxt->dict, name))
5786 xmlFree(name);
5787 if (cur == NULL) {
5788 xmlFreeEnumeration(ret);
5789 return(NULL);
5790 }
5791 if (last == NULL) ret = last = cur;
5792 else {
5793 last->next = cur;
5794 last = cur;
5795 }
5796 }
5797 SKIP_BLANKS;
5798 } while (RAW == '|');
5799 if (RAW != ')') {
5800 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5801 return(ret);
5802 }
5803 NEXT;
5804 return(ret);
5805 }
5806
5807 /**
5808 * xmlParseEnumeratedType:
5809 * @ctxt: an XML parser context
5810 * @tree: the enumeration tree built while parsing
5811 *
5812 * parse an Enumerated attribute type.
5813 *
5814 * [57] EnumeratedType ::= NotationType | Enumeration
5815 *
5816 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5817 *
5818 *
5819 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5820 */
5821
5822 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5823 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5824 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5825 SKIP(8);
5826 if (SKIP_BLANKS == 0) {
5827 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5828 "Space required after 'NOTATION'\n");
5829 return(0);
5830 }
5831 *tree = xmlParseNotationType(ctxt);
5832 if (*tree == NULL) return(0);
5833 return(XML_ATTRIBUTE_NOTATION);
5834 }
5835 *tree = xmlParseEnumerationType(ctxt);
5836 if (*tree == NULL) return(0);
5837 return(XML_ATTRIBUTE_ENUMERATION);
5838 }
5839
5840 /**
5841 * xmlParseAttributeType:
5842 * @ctxt: an XML parser context
5843 * @tree: the enumeration tree built while parsing
5844 *
5845 * parse the Attribute list def for an element
5846 *
5847 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5848 *
5849 * [55] StringType ::= 'CDATA'
5850 *
5851 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5852 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5853 *
5854 * Validity constraints for attribute values syntax are checked in
5855 * xmlValidateAttributeValue()
5856 *
5857 * [ VC: ID ]
5858 * Values of type ID must match the Name production. A name must not
5859 * appear more than once in an XML document as a value of this type;
5860 * i.e., ID values must uniquely identify the elements which bear them.
5861 *
5862 * [ VC: One ID per Element Type ]
5863 * No element type may have more than one ID attribute specified.
5864 *
5865 * [ VC: ID Attribute Default ]
5866 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5867 *
5868 * [ VC: IDREF ]
5869 * Values of type IDREF must match the Name production, and values
5870 * of type IDREFS must match Names; each IDREF Name must match the value
5871 * of an ID attribute on some element in the XML document; i.e. IDREF
5872 * values must match the value of some ID attribute.
5873 *
5874 * [ VC: Entity Name ]
5875 * Values of type ENTITY must match the Name production, values
5876 * of type ENTITIES must match Names; each Entity Name must match the
5877 * name of an unparsed entity declared in the DTD.
5878 *
5879 * [ VC: Name Token ]
5880 * Values of type NMTOKEN must match the Nmtoken production; values
5881 * of type NMTOKENS must match Nmtokens.
5882 *
5883 * Returns the attribute type
5884 */
5885 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5886 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5887 SHRINK;
5888 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5889 SKIP(5);
5890 return(XML_ATTRIBUTE_CDATA);
5891 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5892 SKIP(6);
5893 return(XML_ATTRIBUTE_IDREFS);
5894 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5895 SKIP(5);
5896 return(XML_ATTRIBUTE_IDREF);
5897 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5898 SKIP(2);
5899 return(XML_ATTRIBUTE_ID);
5900 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5901 SKIP(6);
5902 return(XML_ATTRIBUTE_ENTITY);
5903 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5904 SKIP(8);
5905 return(XML_ATTRIBUTE_ENTITIES);
5906 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5907 SKIP(8);
5908 return(XML_ATTRIBUTE_NMTOKENS);
5909 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5910 SKIP(7);
5911 return(XML_ATTRIBUTE_NMTOKEN);
5912 }
5913 return(xmlParseEnumeratedType(ctxt, tree));
5914 }
5915
5916 /**
5917 * xmlParseAttributeListDecl:
5918 * @ctxt: an XML parser context
5919 *
5920 * : parse the Attribute list def for an element
5921 *
5922 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5923 *
5924 * [53] AttDef ::= S Name S AttType S DefaultDecl
5925 *
5926 */
5927 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5928 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5929 const xmlChar *elemName;
5930 const xmlChar *attrName;
5931 xmlEnumerationPtr tree;
5932
5933 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5934 int inputid = ctxt->input->id;
5935
5936 SKIP(9);
5937 if (SKIP_BLANKS == 0) {
5938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5939 "Space required after '<!ATTLIST'\n");
5940 }
5941 elemName = xmlParseName(ctxt);
5942 if (elemName == NULL) {
5943 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5944 "ATTLIST: no name for Element\n");
5945 return;
5946 }
5947 SKIP_BLANKS;
5948 GROW;
5949 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5950 int type;
5951 int def;
5952 xmlChar *defaultValue = NULL;
5953
5954 GROW;
5955 tree = NULL;
5956 attrName = xmlParseName(ctxt);
5957 if (attrName == NULL) {
5958 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5959 "ATTLIST: no name for Attribute\n");
5960 break;
5961 }
5962 GROW;
5963 if (SKIP_BLANKS == 0) {
5964 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5965 "Space required after the attribute name\n");
5966 break;
5967 }
5968
5969 type = xmlParseAttributeType(ctxt, &tree);
5970 if (type <= 0) {
5971 break;
5972 }
5973
5974 GROW;
5975 if (SKIP_BLANKS == 0) {
5976 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5977 "Space required after the attribute type\n");
5978 if (tree != NULL)
5979 xmlFreeEnumeration(tree);
5980 break;
5981 }
5982
5983 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5984 if (def <= 0) {
5985 if (defaultValue != NULL)
5986 xmlFree(defaultValue);
5987 if (tree != NULL)
5988 xmlFreeEnumeration(tree);
5989 break;
5990 }
5991 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5992 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5993
5994 GROW;
5995 if (RAW != '>') {
5996 if (SKIP_BLANKS == 0) {
5997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5998 "Space required after the attribute default value\n");
5999 if (defaultValue != NULL)
6000 xmlFree(defaultValue);
6001 if (tree != NULL)
6002 xmlFreeEnumeration(tree);
6003 break;
6004 }
6005 }
6006 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6007 (ctxt->sax->attributeDecl != NULL))
6008 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6009 type, def, defaultValue, tree);
6010 else if (tree != NULL)
6011 xmlFreeEnumeration(tree);
6012
6013 if ((ctxt->sax2) && (defaultValue != NULL) &&
6014 (def != XML_ATTRIBUTE_IMPLIED) &&
6015 (def != XML_ATTRIBUTE_REQUIRED)) {
6016 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6017 }
6018 if (ctxt->sax2) {
6019 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6020 }
6021 if (defaultValue != NULL)
6022 xmlFree(defaultValue);
6023 GROW;
6024 }
6025 if (RAW == '>') {
6026 if (inputid != ctxt->input->id) {
6027 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6028 "Attribute list declaration doesn't start and"
6029 " stop in the same entity\n");
6030 }
6031 NEXT;
6032 }
6033 }
6034 }
6035
6036 /**
6037 * xmlParseElementMixedContentDecl:
6038 * @ctxt: an XML parser context
6039 * @inputchk: the input used for the current entity, needed for boundary checks
6040 *
6041 * parse the declaration for a Mixed Element content
6042 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6043 *
6044 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6045 * '(' S? '#PCDATA' S? ')'
6046 *
6047 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6048 *
6049 * [ VC: No Duplicate Types ]
6050 * The same name must not appear more than once in a single
6051 * mixed-content declaration.
6052 *
6053 * returns: the list of the xmlElementContentPtr describing the element choices
6054 */
6055 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6056 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6057 xmlElementContentPtr ret = NULL, cur = NULL, n;
6058 const xmlChar *elem = NULL;
6059
6060 GROW;
6061 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6062 SKIP(7);
6063 SKIP_BLANKS;
6064 SHRINK;
6065 if (RAW == ')') {
6066 if (ctxt->input->id != inputchk) {
6067 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6068 "Element content declaration doesn't start and"
6069 " stop in the same entity\n");
6070 }
6071 NEXT;
6072 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6073 if (ret == NULL)
6074 return(NULL);
6075 if (RAW == '*') {
6076 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6077 NEXT;
6078 }
6079 return(ret);
6080 }
6081 if ((RAW == '(') || (RAW == '|')) {
6082 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6083 if (ret == NULL) return(NULL);
6084 }
6085 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6086 NEXT;
6087 if (elem == NULL) {
6088 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6089 if (ret == NULL) return(NULL);
6090 ret->c1 = cur;
6091 if (cur != NULL)
6092 cur->parent = ret;
6093 cur = ret;
6094 } else {
6095 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6096 if (n == NULL) return(NULL);
6097 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6098 if (n->c1 != NULL)
6099 n->c1->parent = n;
6100 cur->c2 = n;
6101 if (n != NULL)
6102 n->parent = cur;
6103 cur = n;
6104 }
6105 SKIP_BLANKS;
6106 elem = xmlParseName(ctxt);
6107 if (elem == NULL) {
6108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6109 "xmlParseElementMixedContentDecl : Name expected\n");
6110 xmlFreeDocElementContent(ctxt->myDoc, ret);
6111 return(NULL);
6112 }
6113 SKIP_BLANKS;
6114 GROW;
6115 }
6116 if ((RAW == ')') && (NXT(1) == '*')) {
6117 if (elem != NULL) {
6118 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6119 XML_ELEMENT_CONTENT_ELEMENT);
6120 if (cur->c2 != NULL)
6121 cur->c2->parent = cur;
6122 }
6123 if (ret != NULL)
6124 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 if (ctxt->input->id != inputchk) {
6126 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6127 "Element content declaration doesn't start and"
6128 " stop in the same entity\n");
6129 }
6130 SKIP(2);
6131 } else {
6132 xmlFreeDocElementContent(ctxt->myDoc, ret);
6133 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6134 return(NULL);
6135 }
6136
6137 } else {
6138 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6139 }
6140 return(ret);
6141 }
6142
6143 /**
6144 * xmlParseElementChildrenContentDeclPriv:
6145 * @ctxt: an XML parser context
6146 * @inputchk: the input used for the current entity, needed for boundary checks
6147 * @depth: the level of recursion
6148 *
6149 * parse the declaration for a Mixed Element content
6150 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6151 *
6152 *
6153 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6154 *
6155 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6156 *
6157 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6158 *
6159 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6160 *
6161 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6162 * TODO Parameter-entity replacement text must be properly nested
6163 * with parenthesized groups. That is to say, if either of the
6164 * opening or closing parentheses in a choice, seq, or Mixed
6165 * construct is contained in the replacement text for a parameter
6166 * entity, both must be contained in the same replacement text. For
6167 * interoperability, if a parameter-entity reference appears in a
6168 * choice, seq, or Mixed construct, its replacement text should not
6169 * be empty, and neither the first nor last non-blank character of
6170 * the replacement text should be a connector (| or ,).
6171 *
6172 * Returns the tree of xmlElementContentPtr describing the element
6173 * hierarchy.
6174 */
6175 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6176 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6177 int depth) {
6178 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6179 const xmlChar *elem;
6180 xmlChar type = 0;
6181
6182 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6183 (depth > 2048)) {
6184 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6185 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6186 depth);
6187 return(NULL);
6188 }
6189 SKIP_BLANKS;
6190 GROW;
6191 if (RAW == '(') {
6192 int inputid = ctxt->input->id;
6193
6194 /* Recurse on first child */
6195 NEXT;
6196 SKIP_BLANKS;
6197 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6198 depth + 1);
6199 SKIP_BLANKS;
6200 GROW;
6201 } else {
6202 elem = xmlParseName(ctxt);
6203 if (elem == NULL) {
6204 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6205 return(NULL);
6206 }
6207 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6208 if (cur == NULL) {
6209 xmlErrMemory(ctxt, NULL);
6210 return(NULL);
6211 }
6212 GROW;
6213 if (RAW == '?') {
6214 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6215 NEXT;
6216 } else if (RAW == '*') {
6217 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6218 NEXT;
6219 } else if (RAW == '+') {
6220 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6221 NEXT;
6222 } else {
6223 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6224 }
6225 GROW;
6226 }
6227 SKIP_BLANKS;
6228 SHRINK;
6229 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6230 /*
6231 * Each loop we parse one separator and one element.
6232 */
6233 if (RAW == ',') {
6234 if (type == 0) type = CUR;
6235
6236 /*
6237 * Detect "Name | Name , Name" error
6238 */
6239 else if (type != CUR) {
6240 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6241 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6242 type);
6243 if ((last != NULL) && (last != ret))
6244 xmlFreeDocElementContent(ctxt->myDoc, last);
6245 if (ret != NULL)
6246 xmlFreeDocElementContent(ctxt->myDoc, ret);
6247 return(NULL);
6248 }
6249 NEXT;
6250
6251 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6252 if (op == NULL) {
6253 if ((last != NULL) && (last != ret))
6254 xmlFreeDocElementContent(ctxt->myDoc, last);
6255 xmlFreeDocElementContent(ctxt->myDoc, ret);
6256 return(NULL);
6257 }
6258 if (last == NULL) {
6259 op->c1 = ret;
6260 if (ret != NULL)
6261 ret->parent = op;
6262 ret = cur = op;
6263 } else {
6264 cur->c2 = op;
6265 if (op != NULL)
6266 op->parent = cur;
6267 op->c1 = last;
6268 if (last != NULL)
6269 last->parent = op;
6270 cur =op;
6271 last = NULL;
6272 }
6273 } else if (RAW == '|') {
6274 if (type == 0) type = CUR;
6275
6276 /*
6277 * Detect "Name , Name | Name" error
6278 */
6279 else if (type != CUR) {
6280 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6281 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6282 type);
6283 if ((last != NULL) && (last != ret))
6284 xmlFreeDocElementContent(ctxt->myDoc, last);
6285 if (ret != NULL)
6286 xmlFreeDocElementContent(ctxt->myDoc, ret);
6287 return(NULL);
6288 }
6289 NEXT;
6290
6291 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6292 if (op == NULL) {
6293 if ((last != NULL) && (last != ret))
6294 xmlFreeDocElementContent(ctxt->myDoc, last);
6295 if (ret != NULL)
6296 xmlFreeDocElementContent(ctxt->myDoc, ret);
6297 return(NULL);
6298 }
6299 if (last == NULL) {
6300 op->c1 = ret;
6301 if (ret != NULL)
6302 ret->parent = op;
6303 ret = cur = op;
6304 } else {
6305 cur->c2 = op;
6306 if (op != NULL)
6307 op->parent = cur;
6308 op->c1 = last;
6309 if (last != NULL)
6310 last->parent = op;
6311 cur =op;
6312 last = NULL;
6313 }
6314 } else {
6315 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6316 if ((last != NULL) && (last != ret))
6317 xmlFreeDocElementContent(ctxt->myDoc, last);
6318 if (ret != NULL)
6319 xmlFreeDocElementContent(ctxt->myDoc, ret);
6320 return(NULL);
6321 }
6322 GROW;
6323 SKIP_BLANKS;
6324 GROW;
6325 if (RAW == '(') {
6326 int inputid = ctxt->input->id;
6327 /* Recurse on second child */
6328 NEXT;
6329 SKIP_BLANKS;
6330 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6331 depth + 1);
6332 SKIP_BLANKS;
6333 } else {
6334 elem = xmlParseName(ctxt);
6335 if (elem == NULL) {
6336 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6337 if (ret != NULL)
6338 xmlFreeDocElementContent(ctxt->myDoc, ret);
6339 return(NULL);
6340 }
6341 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6342 if (last == NULL) {
6343 if (ret != NULL)
6344 xmlFreeDocElementContent(ctxt->myDoc, ret);
6345 return(NULL);
6346 }
6347 if (RAW == '?') {
6348 last->ocur = XML_ELEMENT_CONTENT_OPT;
6349 NEXT;
6350 } else if (RAW == '*') {
6351 last->ocur = XML_ELEMENT_CONTENT_MULT;
6352 NEXT;
6353 } else if (RAW == '+') {
6354 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6355 NEXT;
6356 } else {
6357 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6358 }
6359 }
6360 SKIP_BLANKS;
6361 GROW;
6362 }
6363 if ((cur != NULL) && (last != NULL)) {
6364 cur->c2 = last;
6365 if (last != NULL)
6366 last->parent = cur;
6367 }
6368 if (ctxt->input->id != inputchk) {
6369 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6370 "Element content declaration doesn't start and stop in"
6371 " the same entity\n");
6372 }
6373 NEXT;
6374 if (RAW == '?') {
6375 if (ret != NULL) {
6376 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6377 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6379 else
6380 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6381 }
6382 NEXT;
6383 } else if (RAW == '*') {
6384 if (ret != NULL) {
6385 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6386 cur = ret;
6387 /*
6388 * Some normalization:
6389 * (a | b* | c?)* == (a | b | c)*
6390 */
6391 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6392 if ((cur->c1 != NULL) &&
6393 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6394 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6395 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6396 if ((cur->c2 != NULL) &&
6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 cur = cur->c2;
6401 }
6402 }
6403 NEXT;
6404 } else if (RAW == '+') {
6405 if (ret != NULL) {
6406 int found = 0;
6407
6408 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6409 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411 else
6412 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6413 /*
6414 * Some normalization:
6415 * (a | b*)+ == (a | b)*
6416 * (a | b?)+ == (a | b)*
6417 */
6418 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6419 if ((cur->c1 != NULL) &&
6420 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6421 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6422 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6423 found = 1;
6424 }
6425 if ((cur->c2 != NULL) &&
6426 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6427 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6428 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 found = 1;
6430 }
6431 cur = cur->c2;
6432 }
6433 if (found)
6434 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6435 }
6436 NEXT;
6437 }
6438 return(ret);
6439 }
6440
6441 /**
6442 * xmlParseElementChildrenContentDecl:
6443 * @ctxt: an XML parser context
6444 * @inputchk: the input used for the current entity, needed for boundary checks
6445 *
6446 * parse the declaration for a Mixed Element content
6447 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6448 *
6449 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6450 *
6451 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6452 *
6453 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6454 *
6455 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6456 *
6457 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6458 * TODO Parameter-entity replacement text must be properly nested
6459 * with parenthesized groups. That is to say, if either of the
6460 * opening or closing parentheses in a choice, seq, or Mixed
6461 * construct is contained in the replacement text for a parameter
6462 * entity, both must be contained in the same replacement text. For
6463 * interoperability, if a parameter-entity reference appears in a
6464 * choice, seq, or Mixed construct, its replacement text should not
6465 * be empty, and neither the first nor last non-blank character of
6466 * the replacement text should be a connector (| or ,).
6467 *
6468 * Returns the tree of xmlElementContentPtr describing the element
6469 * hierarchy.
6470 */
6471 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6472 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6473 /* stub left for API/ABI compat */
6474 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6475 }
6476
6477 /**
6478 * xmlParseElementContentDecl:
6479 * @ctxt: an XML parser context
6480 * @name: the name of the element being defined.
6481 * @result: the Element Content pointer will be stored here if any
6482 *
6483 * parse the declaration for an Element content either Mixed or Children,
6484 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6485 *
6486 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6487 *
6488 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6489 */
6490
6491 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6492 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6493 xmlElementContentPtr *result) {
6494
6495 xmlElementContentPtr tree = NULL;
6496 int inputid = ctxt->input->id;
6497 int res;
6498
6499 *result = NULL;
6500
6501 if (RAW != '(') {
6502 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6503 "xmlParseElementContentDecl : %s '(' expected\n", name);
6504 return(-1);
6505 }
6506 NEXT;
6507 GROW;
6508 if (ctxt->instate == XML_PARSER_EOF)
6509 return(-1);
6510 SKIP_BLANKS;
6511 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6512 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6513 res = XML_ELEMENT_TYPE_MIXED;
6514 } else {
6515 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6516 res = XML_ELEMENT_TYPE_ELEMENT;
6517 }
6518 SKIP_BLANKS;
6519 *result = tree;
6520 return(res);
6521 }
6522
6523 /**
6524 * xmlParseElementDecl:
6525 * @ctxt: an XML parser context
6526 *
6527 * parse an Element declaration.
6528 *
6529 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6530 *
6531 * [ VC: Unique Element Type Declaration ]
6532 * No element type may be declared more than once
6533 *
6534 * Returns the type of the element, or -1 in case of error
6535 */
6536 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6537 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6538 const xmlChar *name;
6539 int ret = -1;
6540 xmlElementContentPtr content = NULL;
6541
6542 /* GROW; done in the caller */
6543 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6544 int inputid = ctxt->input->id;
6545
6546 SKIP(9);
6547 if (SKIP_BLANKS == 0) {
6548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6549 "Space required after 'ELEMENT'\n");
6550 return(-1);
6551 }
6552 name = xmlParseName(ctxt);
6553 if (name == NULL) {
6554 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6555 "xmlParseElementDecl: no name for Element\n");
6556 return(-1);
6557 }
6558 if (SKIP_BLANKS == 0) {
6559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6560 "Space required after the element name\n");
6561 }
6562 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6563 SKIP(5);
6564 /*
6565 * Element must always be empty.
6566 */
6567 ret = XML_ELEMENT_TYPE_EMPTY;
6568 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6569 (NXT(2) == 'Y')) {
6570 SKIP(3);
6571 /*
6572 * Element is a generic container.
6573 */
6574 ret = XML_ELEMENT_TYPE_ANY;
6575 } else if (RAW == '(') {
6576 ret = xmlParseElementContentDecl(ctxt, name, &content);
6577 } else {
6578 /*
6579 * [ WFC: PEs in Internal Subset ] error handling.
6580 */
6581 if ((RAW == '%') && (ctxt->external == 0) &&
6582 (ctxt->inputNr == 1)) {
6583 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6584 "PEReference: forbidden within markup decl in internal subset\n");
6585 } else {
6586 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6587 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6588 }
6589 return(-1);
6590 }
6591
6592 SKIP_BLANKS;
6593
6594 if (RAW != '>') {
6595 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6596 if (content != NULL) {
6597 xmlFreeDocElementContent(ctxt->myDoc, content);
6598 }
6599 } else {
6600 if (inputid != ctxt->input->id) {
6601 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6602 "Element declaration doesn't start and stop in"
6603 " the same entity\n");
6604 }
6605
6606 NEXT;
6607 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6608 (ctxt->sax->elementDecl != NULL)) {
6609 if (content != NULL)
6610 content->parent = NULL;
6611 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6612 content);
6613 if ((content != NULL) && (content->parent == NULL)) {
6614 /*
6615 * this is a trick: if xmlAddElementDecl is called,
6616 * instead of copying the full tree it is plugged directly
6617 * if called from the parser. Avoid duplicating the
6618 * interfaces or change the API/ABI
6619 */
6620 xmlFreeDocElementContent(ctxt->myDoc, content);
6621 }
6622 } else if (content != NULL) {
6623 xmlFreeDocElementContent(ctxt->myDoc, content);
6624 }
6625 }
6626 }
6627 return(ret);
6628 }
6629
6630 /**
6631 * xmlParseConditionalSections
6632 * @ctxt: an XML parser context
6633 *
6634 * [61] conditionalSect ::= includeSect | ignoreSect
6635 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6636 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6637 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6638 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6639 */
6640
6641 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6642 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6643 int id = ctxt->input->id;
6644
6645 SKIP(3);
6646 SKIP_BLANKS;
6647 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6648 SKIP(7);
6649 SKIP_BLANKS;
6650 if (RAW != '[') {
6651 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6652 xmlHaltParser(ctxt);
6653 return;
6654 } else {
6655 if (ctxt->input->id != id) {
6656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657 "All markup of the conditional section is not"
6658 " in the same entity\n");
6659 }
6660 NEXT;
6661 }
6662 if (xmlParserDebugEntities) {
6663 if ((ctxt->input != NULL) && (ctxt->input->filename))
6664 xmlGenericError(xmlGenericErrorContext,
6665 "%s(%d): ", ctxt->input->filename,
6666 ctxt->input->line);
6667 xmlGenericError(xmlGenericErrorContext,
6668 "Entering INCLUDE Conditional Section\n");
6669 }
6670
6671 SKIP_BLANKS;
6672 GROW;
6673 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6674 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6675 const xmlChar *check = CUR_PTR;
6676 unsigned int cons = ctxt->input->consumed;
6677
6678 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6679 xmlParseConditionalSections(ctxt);
6680 } else
6681 xmlParseMarkupDecl(ctxt);
6682
6683 SKIP_BLANKS;
6684 GROW;
6685
6686 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6687 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6688 xmlHaltParser(ctxt);
6689 break;
6690 }
6691 }
6692 if (xmlParserDebugEntities) {
6693 if ((ctxt->input != NULL) && (ctxt->input->filename))
6694 xmlGenericError(xmlGenericErrorContext,
6695 "%s(%d): ", ctxt->input->filename,
6696 ctxt->input->line);
6697 xmlGenericError(xmlGenericErrorContext,
6698 "Leaving INCLUDE Conditional Section\n");
6699 }
6700
6701 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6702 int state;
6703 xmlParserInputState instate;
6704 int depth = 0;
6705
6706 SKIP(6);
6707 SKIP_BLANKS;
6708 if (RAW != '[') {
6709 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6710 xmlHaltParser(ctxt);
6711 return;
6712 } else {
6713 if (ctxt->input->id != id) {
6714 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6715 "All markup of the conditional section is not"
6716 " in the same entity\n");
6717 }
6718 NEXT;
6719 }
6720 if (xmlParserDebugEntities) {
6721 if ((ctxt->input != NULL) && (ctxt->input->filename))
6722 xmlGenericError(xmlGenericErrorContext,
6723 "%s(%d): ", ctxt->input->filename,
6724 ctxt->input->line);
6725 xmlGenericError(xmlGenericErrorContext,
6726 "Entering IGNORE Conditional Section\n");
6727 }
6728
6729 /*
6730 * Parse up to the end of the conditional section
6731 * But disable SAX event generating DTD building in the meantime
6732 */
6733 state = ctxt->disableSAX;
6734 instate = ctxt->instate;
6735 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6736 ctxt->instate = XML_PARSER_IGNORE;
6737
6738 while (((depth >= 0) && (RAW != 0)) &&
6739 (ctxt->instate != XML_PARSER_EOF)) {
6740 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6741 depth++;
6742 SKIP(3);
6743 continue;
6744 }
6745 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6746 if (--depth >= 0) SKIP(3);
6747 continue;
6748 }
6749 NEXT;
6750 continue;
6751 }
6752
6753 ctxt->disableSAX = state;
6754 ctxt->instate = instate;
6755
6756 if (xmlParserDebugEntities) {
6757 if ((ctxt->input != NULL) && (ctxt->input->filename))
6758 xmlGenericError(xmlGenericErrorContext,
6759 "%s(%d): ", ctxt->input->filename,
6760 ctxt->input->line);
6761 xmlGenericError(xmlGenericErrorContext,
6762 "Leaving IGNORE Conditional Section\n");
6763 }
6764
6765 } else {
6766 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6767 xmlHaltParser(ctxt);
6768 return;
6769 }
6770
6771 if (RAW == 0)
6772 SHRINK;
6773
6774 if (RAW == 0) {
6775 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6776 } else {
6777 if (ctxt->input->id != id) {
6778 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6779 "All markup of the conditional section is not in"
6780 " the same entity\n");
6781 }
6782 if ((ctxt-> instate != XML_PARSER_EOF) &&
6783 ((ctxt->input->cur + 3) <= ctxt->input->end))
6784 SKIP(3);
6785 }
6786 }
6787
6788 /**
6789 * xmlParseMarkupDecl:
6790 * @ctxt: an XML parser context
6791 *
6792 * parse Markup declarations
6793 *
6794 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6795 * NotationDecl | PI | Comment
6796 *
6797 * [ VC: Proper Declaration/PE Nesting ]
6798 * Parameter-entity replacement text must be properly nested with
6799 * markup declarations. That is to say, if either the first character
6800 * or the last character of a markup declaration (markupdecl above) is
6801 * contained in the replacement text for a parameter-entity reference,
6802 * both must be contained in the same replacement text.
6803 *
6804 * [ WFC: PEs in Internal Subset ]
6805 * In the internal DTD subset, parameter-entity references can occur
6806 * only where markup declarations can occur, not within markup declarations.
6807 * (This does not apply to references that occur in external parameter
6808 * entities or to the external subset.)
6809 */
6810 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6811 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6812 GROW;
6813 if (CUR == '<') {
6814 if (NXT(1) == '!') {
6815 switch (NXT(2)) {
6816 case 'E':
6817 if (NXT(3) == 'L')
6818 xmlParseElementDecl(ctxt);
6819 else if (NXT(3) == 'N')
6820 xmlParseEntityDecl(ctxt);
6821 break;
6822 case 'A':
6823 xmlParseAttributeListDecl(ctxt);
6824 break;
6825 case 'N':
6826 xmlParseNotationDecl(ctxt);
6827 break;
6828 case '-':
6829 xmlParseComment(ctxt);
6830 break;
6831 default:
6832 /* there is an error but it will be detected later */
6833 break;
6834 }
6835 } else if (NXT(1) == '?') {
6836 xmlParsePI(ctxt);
6837 }
6838 }
6839
6840 /*
6841 * detect requirement to exit there and act accordingly
6842 * and avoid having instate overriden later on
6843 */
6844 if (ctxt->instate == XML_PARSER_EOF)
6845 return;
6846
6847 /*
6848 * Conditional sections are allowed from entities included
6849 * by PE References in the internal subset.
6850 */
6851 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6852 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6853 xmlParseConditionalSections(ctxt);
6854 }
6855 }
6856
6857 ctxt->instate = XML_PARSER_DTD;
6858 }
6859
6860 /**
6861 * xmlParseTextDecl:
6862 * @ctxt: an XML parser context
6863 *
6864 * parse an XML declaration header for external entities
6865 *
6866 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6867 */
6868
6869 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6870 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6871 xmlChar *version;
6872 const xmlChar *encoding;
6873
6874 /*
6875 * We know that '<?xml' is here.
6876 */
6877 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6878 SKIP(5);
6879 } else {
6880 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6881 return;
6882 }
6883
6884 if (SKIP_BLANKS == 0) {
6885 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6886 "Space needed after '<?xml'\n");
6887 }
6888
6889 /*
6890 * We may have the VersionInfo here.
6891 */
6892 version = xmlParseVersionInfo(ctxt);
6893 if (version == NULL)
6894 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6895 else {
6896 if (SKIP_BLANKS == 0) {
6897 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6898 "Space needed here\n");
6899 }
6900 }
6901 ctxt->input->version = version;
6902
6903 /*
6904 * We must have the encoding declaration
6905 */
6906 encoding = xmlParseEncodingDecl(ctxt);
6907 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6908 /*
6909 * The XML REC instructs us to stop parsing right here
6910 */
6911 return;
6912 }
6913 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6914 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6915 "Missing encoding in text declaration\n");
6916 }
6917
6918 SKIP_BLANKS;
6919 if ((RAW == '?') && (NXT(1) == '>')) {
6920 SKIP(2);
6921 } else if (RAW == '>') {
6922 /* Deprecated old WD ... */
6923 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6924 NEXT;
6925 } else {
6926 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6927 MOVETO_ENDTAG(CUR_PTR);
6928 NEXT;
6929 }
6930 }
6931
6932 /**
6933 * xmlParseExternalSubset:
6934 * @ctxt: an XML parser context
6935 * @ExternalID: the external identifier
6936 * @SystemID: the system identifier (or URL)
6937 *
6938 * parse Markup declarations from an external subset
6939 *
6940 * [30] extSubset ::= textDecl? extSubsetDecl
6941 *
6942 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6943 */
6944 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6945 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6946 const xmlChar *SystemID) {
6947 xmlDetectSAX2(ctxt);
6948 GROW;
6949
6950 if ((ctxt->encoding == NULL) &&
6951 (ctxt->input->end - ctxt->input->cur >= 4)) {
6952 xmlChar start[4];
6953 xmlCharEncoding enc;
6954
6955 start[0] = RAW;
6956 start[1] = NXT(1);
6957 start[2] = NXT(2);
6958 start[3] = NXT(3);
6959 enc = xmlDetectCharEncoding(start, 4);
6960 if (enc != XML_CHAR_ENCODING_NONE)
6961 xmlSwitchEncoding(ctxt, enc);
6962 }
6963
6964 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6965 xmlParseTextDecl(ctxt);
6966 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6967 /*
6968 * The XML REC instructs us to stop parsing right here
6969 */
6970 xmlHaltParser(ctxt);
6971 return;
6972 }
6973 }
6974 if (ctxt->myDoc == NULL) {
6975 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6976 if (ctxt->myDoc == NULL) {
6977 xmlErrMemory(ctxt, "New Doc failed");
6978 return;
6979 }
6980 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6981 }
6982 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6983 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6984
6985 ctxt->instate = XML_PARSER_DTD;
6986 ctxt->external = 1;
6987 SKIP_BLANKS;
6988 while (((RAW == '<') && (NXT(1) == '?')) ||
6989 ((RAW == '<') && (NXT(1) == '!')) ||
6990 (RAW == '%')) {
6991 const xmlChar *check = CUR_PTR;
6992 unsigned int cons = ctxt->input->consumed;
6993
6994 GROW;
6995 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6996 xmlParseConditionalSections(ctxt);
6997 } else
6998 xmlParseMarkupDecl(ctxt);
6999 SKIP_BLANKS;
7000
7001 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7002 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7003 break;
7004 }
7005 }
7006
7007 if (RAW != 0) {
7008 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7009 }
7010
7011 }
7012
7013 /**
7014 * xmlParseReference:
7015 * @ctxt: an XML parser context
7016 *
7017 * parse and handle entity references in content, depending on the SAX
7018 * interface, this may end-up in a call to character() if this is a
7019 * CharRef, a predefined entity, if there is no reference() callback.
7020 * or if the parser was asked to switch to that mode.
7021 *
7022 * [67] Reference ::= EntityRef | CharRef
7023 */
7024 void
xmlParseReference(xmlParserCtxtPtr ctxt)7025 xmlParseReference(xmlParserCtxtPtr ctxt) {
7026 xmlEntityPtr ent;
7027 xmlChar *val;
7028 int was_checked;
7029 xmlNodePtr list = NULL;
7030 xmlParserErrors ret = XML_ERR_OK;
7031
7032
7033 if (RAW != '&')
7034 return;
7035
7036 /*
7037 * Simple case of a CharRef
7038 */
7039 if (NXT(1) == '#') {
7040 int i = 0;
7041 xmlChar out[12];
7042 int hex = NXT(2);
7043 int value = xmlParseCharRef(ctxt);
7044
7045 if (value == 0)
7046 return;
7047 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7048 /*
7049 * So we are using non-UTF-8 buffers
7050 * Check that the char fit on 8bits, if not
7051 * generate a CharRef.
7052 */
7053 if (value <= 0xFF) {
7054 out[0] = value;
7055 out[1] = 0;
7056 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7057 (!ctxt->disableSAX))
7058 ctxt->sax->characters(ctxt->userData, out, 1);
7059 } else {
7060 if ((hex == 'x') || (hex == 'X'))
7061 snprintf((char *)out, sizeof(out), "#x%X", value);
7062 else
7063 snprintf((char *)out, sizeof(out), "#%d", value);
7064 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065 (!ctxt->disableSAX))
7066 ctxt->sax->reference(ctxt->userData, out);
7067 }
7068 } else {
7069 /*
7070 * Just encode the value in UTF-8
7071 */
7072 COPY_BUF(0 ,out, i, value);
7073 out[i] = 0;
7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 (!ctxt->disableSAX))
7076 ctxt->sax->characters(ctxt->userData, out, i);
7077 }
7078 return;
7079 }
7080
7081 /*
7082 * We are seeing an entity reference
7083 */
7084 ent = xmlParseEntityRef(ctxt);
7085 if (ent == NULL) return;
7086 if (!ctxt->wellFormed)
7087 return;
7088 was_checked = ent->checked;
7089
7090 /* special case of predefined entities */
7091 if ((ent->name == NULL) ||
7092 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7093 val = ent->content;
7094 if (val == NULL) return;
7095 /*
7096 * inline the entity.
7097 */
7098 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7099 (!ctxt->disableSAX))
7100 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7101 return;
7102 }
7103
7104 /*
7105 * The first reference to the entity trigger a parsing phase
7106 * where the ent->children is filled with the result from
7107 * the parsing.
7108 * Note: external parsed entities will not be loaded, it is not
7109 * required for a non-validating parser, unless the parsing option
7110 * of validating, or substituting entities were given. Doing so is
7111 * far more secure as the parser will only process data coming from
7112 * the document entity by default.
7113 */
7114 if (((ent->checked == 0) ||
7115 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7116 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7117 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7118 unsigned long oldnbent = ctxt->nbentities;
7119
7120 /*
7121 * This is a bit hackish but this seems the best
7122 * way to make sure both SAX and DOM entity support
7123 * behaves okay.
7124 */
7125 void *user_data;
7126 if (ctxt->userData == ctxt)
7127 user_data = NULL;
7128 else
7129 user_data = ctxt->userData;
7130
7131 /*
7132 * Check that this entity is well formed
7133 * 4.3.2: An internal general parsed entity is well-formed
7134 * if its replacement text matches the production labeled
7135 * content.
7136 */
7137 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7138 ctxt->depth++;
7139 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7140 user_data, &list);
7141 ctxt->depth--;
7142
7143 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7144 ctxt->depth++;
7145 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7146 user_data, ctxt->depth, ent->URI,
7147 ent->ExternalID, &list);
7148 ctxt->depth--;
7149 } else {
7150 ret = XML_ERR_ENTITY_PE_INTERNAL;
7151 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7152 "invalid entity type found\n", NULL);
7153 }
7154
7155 /*
7156 * Store the number of entities needing parsing for this entity
7157 * content and do checkings
7158 */
7159 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7160 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7161 ent->checked |= 1;
7162 if (ret == XML_ERR_ENTITY_LOOP) {
7163 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7164 xmlFreeNodeList(list);
7165 return;
7166 }
7167 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7168 xmlFreeNodeList(list);
7169 return;
7170 }
7171
7172 if ((ret == XML_ERR_OK) && (list != NULL)) {
7173 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7174 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7175 (ent->children == NULL)) {
7176 ent->children = list;
7177 if (ctxt->replaceEntities) {
7178 /*
7179 * Prune it directly in the generated document
7180 * except for single text nodes.
7181 */
7182 if (((list->type == XML_TEXT_NODE) &&
7183 (list->next == NULL)) ||
7184 (ctxt->parseMode == XML_PARSE_READER)) {
7185 list->parent = (xmlNodePtr) ent;
7186 list = NULL;
7187 ent->owner = 1;
7188 } else {
7189 ent->owner = 0;
7190 while (list != NULL) {
7191 list->parent = (xmlNodePtr) ctxt->node;
7192 list->doc = ctxt->myDoc;
7193 if (list->next == NULL)
7194 ent->last = list;
7195 list = list->next;
7196 }
7197 list = ent->children;
7198 #ifdef LIBXML_LEGACY_ENABLED
7199 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7200 xmlAddEntityReference(ent, list, NULL);
7201 #endif /* LIBXML_LEGACY_ENABLED */
7202 }
7203 } else {
7204 ent->owner = 1;
7205 while (list != NULL) {
7206 list->parent = (xmlNodePtr) ent;
7207 xmlSetTreeDoc(list, ent->doc);
7208 if (list->next == NULL)
7209 ent->last = list;
7210 list = list->next;
7211 }
7212 }
7213 } else {
7214 xmlFreeNodeList(list);
7215 list = NULL;
7216 }
7217 } else if ((ret != XML_ERR_OK) &&
7218 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7219 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7220 "Entity '%s' failed to parse\n", ent->name);
7221 if (ent->content != NULL)
7222 ent->content[0] = 0;
7223 xmlParserEntityCheck(ctxt, 0, ent, 0);
7224 } else if (list != NULL) {
7225 xmlFreeNodeList(list);
7226 list = NULL;
7227 }
7228 if (ent->checked == 0)
7229 ent->checked = 2;
7230
7231 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7232 was_checked = 0;
7233 } else if (ent->checked != 1) {
7234 ctxt->nbentities += ent->checked / 2;
7235 }
7236
7237 /*
7238 * Now that the entity content has been gathered
7239 * provide it to the application, this can take different forms based
7240 * on the parsing modes.
7241 */
7242 if (ent->children == NULL) {
7243 /*
7244 * Probably running in SAX mode and the callbacks don't
7245 * build the entity content. So unless we already went
7246 * though parsing for first checking go though the entity
7247 * content to generate callbacks associated to the entity
7248 */
7249 if (was_checked != 0) {
7250 void *user_data;
7251 /*
7252 * This is a bit hackish but this seems the best
7253 * way to make sure both SAX and DOM entity support
7254 * behaves okay.
7255 */
7256 if (ctxt->userData == ctxt)
7257 user_data = NULL;
7258 else
7259 user_data = ctxt->userData;
7260
7261 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7262 ctxt->depth++;
7263 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7264 ent->content, user_data, NULL);
7265 ctxt->depth--;
7266 } else if (ent->etype ==
7267 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7268 ctxt->depth++;
7269 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7270 ctxt->sax, user_data, ctxt->depth,
7271 ent->URI, ent->ExternalID, NULL);
7272 ctxt->depth--;
7273 } else {
7274 ret = XML_ERR_ENTITY_PE_INTERNAL;
7275 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7276 "invalid entity type found\n", NULL);
7277 }
7278 if (ret == XML_ERR_ENTITY_LOOP) {
7279 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7280 return;
7281 }
7282 }
7283 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7284 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7285 /*
7286 * Entity reference callback comes second, it's somewhat
7287 * superfluous but a compatibility to historical behaviour
7288 */
7289 ctxt->sax->reference(ctxt->userData, ent->name);
7290 }
7291 return;
7292 }
7293
7294 /*
7295 * If we didn't get any children for the entity being built
7296 */
7297 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7298 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7299 /*
7300 * Create a node.
7301 */
7302 ctxt->sax->reference(ctxt->userData, ent->name);
7303 return;
7304 }
7305
7306 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7307 /*
7308 * There is a problem on the handling of _private for entities
7309 * (bug 155816): Should we copy the content of the field from
7310 * the entity (possibly overwriting some value set by the user
7311 * when a copy is created), should we leave it alone, or should
7312 * we try to take care of different situations? The problem
7313 * is exacerbated by the usage of this field by the xmlReader.
7314 * To fix this bug, we look at _private on the created node
7315 * and, if it's NULL, we copy in whatever was in the entity.
7316 * If it's not NULL we leave it alone. This is somewhat of a
7317 * hack - maybe we should have further tests to determine
7318 * what to do.
7319 */
7320 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7321 /*
7322 * Seems we are generating the DOM content, do
7323 * a simple tree copy for all references except the first
7324 * In the first occurrence list contains the replacement.
7325 */
7326 if (((list == NULL) && (ent->owner == 0)) ||
7327 (ctxt->parseMode == XML_PARSE_READER)) {
7328 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7329
7330 /*
7331 * We are copying here, make sure there is no abuse
7332 */
7333 ctxt->sizeentcopy += ent->length + 5;
7334 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7335 return;
7336
7337 /*
7338 * when operating on a reader, the entities definitions
7339 * are always owning the entities subtree.
7340 if (ctxt->parseMode == XML_PARSE_READER)
7341 ent->owner = 1;
7342 */
7343
7344 cur = ent->children;
7345 while (cur != NULL) {
7346 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7347 if (nw != NULL) {
7348 if (nw->_private == NULL)
7349 nw->_private = cur->_private;
7350 if (firstChild == NULL){
7351 firstChild = nw;
7352 }
7353 nw = xmlAddChild(ctxt->node, nw);
7354 }
7355 if (cur == ent->last) {
7356 /*
7357 * needed to detect some strange empty
7358 * node cases in the reader tests
7359 */
7360 if ((ctxt->parseMode == XML_PARSE_READER) &&
7361 (nw != NULL) &&
7362 (nw->type == XML_ELEMENT_NODE) &&
7363 (nw->children == NULL))
7364 nw->extra = 1;
7365
7366 break;
7367 }
7368 cur = cur->next;
7369 }
7370 #ifdef LIBXML_LEGACY_ENABLED
7371 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7372 xmlAddEntityReference(ent, firstChild, nw);
7373 #endif /* LIBXML_LEGACY_ENABLED */
7374 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7375 xmlNodePtr nw = NULL, cur, next, last,
7376 firstChild = NULL;
7377
7378 /*
7379 * We are copying here, make sure there is no abuse
7380 */
7381 ctxt->sizeentcopy += ent->length + 5;
7382 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7383 return;
7384
7385 /*
7386 * Copy the entity child list and make it the new
7387 * entity child list. The goal is to make sure any
7388 * ID or REF referenced will be the one from the
7389 * document content and not the entity copy.
7390 */
7391 cur = ent->children;
7392 ent->children = NULL;
7393 last = ent->last;
7394 ent->last = NULL;
7395 while (cur != NULL) {
7396 next = cur->next;
7397 cur->next = NULL;
7398 cur->parent = NULL;
7399 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7400 if (nw != NULL) {
7401 if (nw->_private == NULL)
7402 nw->_private = cur->_private;
7403 if (firstChild == NULL){
7404 firstChild = cur;
7405 }
7406 xmlAddChild((xmlNodePtr) ent, nw);
7407 xmlAddChild(ctxt->node, cur);
7408 }
7409 if (cur == last)
7410 break;
7411 cur = next;
7412 }
7413 if (ent->owner == 0)
7414 ent->owner = 1;
7415 #ifdef LIBXML_LEGACY_ENABLED
7416 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7417 xmlAddEntityReference(ent, firstChild, nw);
7418 #endif /* LIBXML_LEGACY_ENABLED */
7419 } else {
7420 const xmlChar *nbktext;
7421
7422 /*
7423 * the name change is to avoid coalescing of the
7424 * node with a possible previous text one which
7425 * would make ent->children a dangling pointer
7426 */
7427 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7428 -1);
7429 if (ent->children->type == XML_TEXT_NODE)
7430 ent->children->name = nbktext;
7431 if ((ent->last != ent->children) &&
7432 (ent->last->type == XML_TEXT_NODE))
7433 ent->last->name = nbktext;
7434 xmlAddChildList(ctxt->node, ent->children);
7435 }
7436
7437 /*
7438 * This is to avoid a nasty side effect, see
7439 * characters() in SAX.c
7440 */
7441 ctxt->nodemem = 0;
7442 ctxt->nodelen = 0;
7443 return;
7444 }
7445 }
7446 }
7447
7448 /**
7449 * xmlParseEntityRef:
7450 * @ctxt: an XML parser context
7451 *
7452 * parse ENTITY references declarations
7453 *
7454 * [68] EntityRef ::= '&' Name ';'
7455 *
7456 * [ WFC: Entity Declared ]
7457 * In a document without any DTD, a document with only an internal DTD
7458 * subset which contains no parameter entity references, or a document
7459 * with "standalone='yes'", the Name given in the entity reference
7460 * must match that in an entity declaration, except that well-formed
7461 * documents need not declare any of the following entities: amp, lt,
7462 * gt, apos, quot. The declaration of a parameter entity must precede
7463 * any reference to it. Similarly, the declaration of a general entity
7464 * must precede any reference to it which appears in a default value in an
7465 * attribute-list declaration. Note that if entities are declared in the
7466 * external subset or in external parameter entities, a non-validating
7467 * processor is not obligated to read and process their declarations;
7468 * for such documents, the rule that an entity must be declared is a
7469 * well-formedness constraint only if standalone='yes'.
7470 *
7471 * [ WFC: Parsed Entity ]
7472 * An entity reference must not contain the name of an unparsed entity
7473 *
7474 * Returns the xmlEntityPtr if found, or NULL otherwise.
7475 */
7476 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7477 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7478 const xmlChar *name;
7479 xmlEntityPtr ent = NULL;
7480
7481 GROW;
7482 if (ctxt->instate == XML_PARSER_EOF)
7483 return(NULL);
7484
7485 if (RAW != '&')
7486 return(NULL);
7487 NEXT;
7488 name = xmlParseName(ctxt);
7489 if (name == NULL) {
7490 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7491 "xmlParseEntityRef: no name\n");
7492 return(NULL);
7493 }
7494 if (RAW != ';') {
7495 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7496 return(NULL);
7497 }
7498 NEXT;
7499
7500 /*
7501 * Predefined entities override any extra definition
7502 */
7503 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7504 ent = xmlGetPredefinedEntity(name);
7505 if (ent != NULL)
7506 return(ent);
7507 }
7508
7509 /*
7510 * Increase the number of entity references parsed
7511 */
7512 ctxt->nbentities++;
7513
7514 /*
7515 * Ask first SAX for entity resolution, otherwise try the
7516 * entities which may have stored in the parser context.
7517 */
7518 if (ctxt->sax != NULL) {
7519 if (ctxt->sax->getEntity != NULL)
7520 ent = ctxt->sax->getEntity(ctxt->userData, name);
7521 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7522 (ctxt->options & XML_PARSE_OLDSAX))
7523 ent = xmlGetPredefinedEntity(name);
7524 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7525 (ctxt->userData==ctxt)) {
7526 ent = xmlSAX2GetEntity(ctxt, name);
7527 }
7528 }
7529 if (ctxt->instate == XML_PARSER_EOF)
7530 return(NULL);
7531 /*
7532 * [ WFC: Entity Declared ]
7533 * In a document without any DTD, a document with only an
7534 * internal DTD subset which contains no parameter entity
7535 * references, or a document with "standalone='yes'", the
7536 * Name given in the entity reference must match that in an
7537 * entity declaration, except that well-formed documents
7538 * need not declare any of the following entities: amp, lt,
7539 * gt, apos, quot.
7540 * The declaration of a parameter entity must precede any
7541 * reference to it.
7542 * Similarly, the declaration of a general entity must
7543 * precede any reference to it which appears in a default
7544 * value in an attribute-list declaration. Note that if
7545 * entities are declared in the external subset or in
7546 * external parameter entities, a non-validating processor
7547 * is not obligated to read and process their declarations;
7548 * for such documents, the rule that an entity must be
7549 * declared is a well-formedness constraint only if
7550 * standalone='yes'.
7551 */
7552 if (ent == NULL) {
7553 if ((ctxt->standalone == 1) ||
7554 ((ctxt->hasExternalSubset == 0) &&
7555 (ctxt->hasPErefs == 0))) {
7556 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7557 "Entity '%s' not defined\n", name);
7558 } else {
7559 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7560 "Entity '%s' not defined\n", name);
7561 if ((ctxt->inSubset == 0) &&
7562 (ctxt->sax != NULL) &&
7563 (ctxt->sax->reference != NULL)) {
7564 ctxt->sax->reference(ctxt->userData, name);
7565 }
7566 }
7567 xmlParserEntityCheck(ctxt, 0, ent, 0);
7568 ctxt->valid = 0;
7569 }
7570
7571 /*
7572 * [ WFC: Parsed Entity ]
7573 * An entity reference must not contain the name of an
7574 * unparsed entity
7575 */
7576 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7578 "Entity reference to unparsed entity %s\n", name);
7579 }
7580
7581 /*
7582 * [ WFC: No External Entity References ]
7583 * Attribute values cannot contain direct or indirect
7584 * entity references to external entities.
7585 */
7586 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7587 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7589 "Attribute references external entity '%s'\n", name);
7590 }
7591 /*
7592 * [ WFC: No < in Attribute Values ]
7593 * The replacement text of any entity referred to directly or
7594 * indirectly in an attribute value (other than "<") must
7595 * not contain a <.
7596 */
7597 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7598 (ent != NULL) &&
7599 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7600 if (((ent->checked & 1) || (ent->checked == 0)) &&
7601 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7602 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7603 "'<' in entity '%s' is not allowed in attributes values\n", name);
7604 }
7605 }
7606
7607 /*
7608 * Internal check, no parameter entities here ...
7609 */
7610 else {
7611 switch (ent->etype) {
7612 case XML_INTERNAL_PARAMETER_ENTITY:
7613 case XML_EXTERNAL_PARAMETER_ENTITY:
7614 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7615 "Attempt to reference the parameter entity '%s'\n",
7616 name);
7617 break;
7618 default:
7619 break;
7620 }
7621 }
7622
7623 /*
7624 * [ WFC: No Recursion ]
7625 * A parsed entity must not contain a recursive reference
7626 * to itself, either directly or indirectly.
7627 * Done somewhere else
7628 */
7629 return(ent);
7630 }
7631
7632 /**
7633 * xmlParseStringEntityRef:
7634 * @ctxt: an XML parser context
7635 * @str: a pointer to an index in the string
7636 *
7637 * parse ENTITY references declarations, but this version parses it from
7638 * a string value.
7639 *
7640 * [68] EntityRef ::= '&' Name ';'
7641 *
7642 * [ WFC: Entity Declared ]
7643 * In a document without any DTD, a document with only an internal DTD
7644 * subset which contains no parameter entity references, or a document
7645 * with "standalone='yes'", the Name given in the entity reference
7646 * must match that in an entity declaration, except that well-formed
7647 * documents need not declare any of the following entities: amp, lt,
7648 * gt, apos, quot. The declaration of a parameter entity must precede
7649 * any reference to it. Similarly, the declaration of a general entity
7650 * must precede any reference to it which appears in a default value in an
7651 * attribute-list declaration. Note that if entities are declared in the
7652 * external subset or in external parameter entities, a non-validating
7653 * processor is not obligated to read and process their declarations;
7654 * for such documents, the rule that an entity must be declared is a
7655 * well-formedness constraint only if standalone='yes'.
7656 *
7657 * [ WFC: Parsed Entity ]
7658 * An entity reference must not contain the name of an unparsed entity
7659 *
7660 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7661 * is updated to the current location in the string.
7662 */
7663 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7664 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7665 xmlChar *name;
7666 const xmlChar *ptr;
7667 xmlChar cur;
7668 xmlEntityPtr ent = NULL;
7669
7670 if ((str == NULL) || (*str == NULL))
7671 return(NULL);
7672 ptr = *str;
7673 cur = *ptr;
7674 if (cur != '&')
7675 return(NULL);
7676
7677 ptr++;
7678 name = xmlParseStringName(ctxt, &ptr);
7679 if (name == NULL) {
7680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 "xmlParseStringEntityRef: no name\n");
7682 *str = ptr;
7683 return(NULL);
7684 }
7685 if (*ptr != ';') {
7686 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7687 xmlFree(name);
7688 *str = ptr;
7689 return(NULL);
7690 }
7691 ptr++;
7692
7693
7694 /*
7695 * Predefined entities override any extra definition
7696 */
7697 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7698 ent = xmlGetPredefinedEntity(name);
7699 if (ent != NULL) {
7700 xmlFree(name);
7701 *str = ptr;
7702 return(ent);
7703 }
7704 }
7705
7706 /*
7707 * Increate the number of entity references parsed
7708 */
7709 ctxt->nbentities++;
7710
7711 /*
7712 * Ask first SAX for entity resolution, otherwise try the
7713 * entities which may have stored in the parser context.
7714 */
7715 if (ctxt->sax != NULL) {
7716 if (ctxt->sax->getEntity != NULL)
7717 ent = ctxt->sax->getEntity(ctxt->userData, name);
7718 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7719 ent = xmlGetPredefinedEntity(name);
7720 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7721 ent = xmlSAX2GetEntity(ctxt, name);
7722 }
7723 }
7724 if (ctxt->instate == XML_PARSER_EOF) {
7725 xmlFree(name);
7726 return(NULL);
7727 }
7728
7729 /*
7730 * [ WFC: Entity Declared ]
7731 * In a document without any DTD, a document with only an
7732 * internal DTD subset which contains no parameter entity
7733 * references, or a document with "standalone='yes'", the
7734 * Name given in the entity reference must match that in an
7735 * entity declaration, except that well-formed documents
7736 * need not declare any of the following entities: amp, lt,
7737 * gt, apos, quot.
7738 * The declaration of a parameter entity must precede any
7739 * reference to it.
7740 * Similarly, the declaration of a general entity must
7741 * precede any reference to it which appears in a default
7742 * value in an attribute-list declaration. Note that if
7743 * entities are declared in the external subset or in
7744 * external parameter entities, a non-validating processor
7745 * is not obligated to read and process their declarations;
7746 * for such documents, the rule that an entity must be
7747 * declared is a well-formedness constraint only if
7748 * standalone='yes'.
7749 */
7750 if (ent == NULL) {
7751 if ((ctxt->standalone == 1) ||
7752 ((ctxt->hasExternalSubset == 0) &&
7753 (ctxt->hasPErefs == 0))) {
7754 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7755 "Entity '%s' not defined\n", name);
7756 } else {
7757 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7758 "Entity '%s' not defined\n",
7759 name);
7760 }
7761 xmlParserEntityCheck(ctxt, 0, ent, 0);
7762 /* TODO ? check regressions ctxt->valid = 0; */
7763 }
7764
7765 /*
7766 * [ WFC: Parsed Entity ]
7767 * An entity reference must not contain the name of an
7768 * unparsed entity
7769 */
7770 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7771 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7772 "Entity reference to unparsed entity %s\n", name);
7773 }
7774
7775 /*
7776 * [ WFC: No External Entity References ]
7777 * Attribute values cannot contain direct or indirect
7778 * entity references to external entities.
7779 */
7780 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7781 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7782 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7783 "Attribute references external entity '%s'\n", name);
7784 }
7785 /*
7786 * [ WFC: No < in Attribute Values ]
7787 * The replacement text of any entity referred to directly or
7788 * indirectly in an attribute value (other than "<") must
7789 * not contain a <.
7790 */
7791 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7792 (ent != NULL) && (ent->content != NULL) &&
7793 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7794 (xmlStrchr(ent->content, '<'))) {
7795 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7796 "'<' in entity '%s' is not allowed in attributes values\n",
7797 name);
7798 }
7799
7800 /*
7801 * Internal check, no parameter entities here ...
7802 */
7803 else {
7804 switch (ent->etype) {
7805 case XML_INTERNAL_PARAMETER_ENTITY:
7806 case XML_EXTERNAL_PARAMETER_ENTITY:
7807 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7808 "Attempt to reference the parameter entity '%s'\n",
7809 name);
7810 break;
7811 default:
7812 break;
7813 }
7814 }
7815
7816 /*
7817 * [ WFC: No Recursion ]
7818 * A parsed entity must not contain a recursive reference
7819 * to itself, either directly or indirectly.
7820 * Done somewhere else
7821 */
7822
7823 xmlFree(name);
7824 *str = ptr;
7825 return(ent);
7826 }
7827
7828 /**
7829 * xmlParsePEReference:
7830 * @ctxt: an XML parser context
7831 *
7832 * parse PEReference declarations
7833 * The entity content is handled directly by pushing it's content as
7834 * a new input stream.
7835 *
7836 * [69] PEReference ::= '%' Name ';'
7837 *
7838 * [ WFC: No Recursion ]
7839 * A parsed entity must not contain a recursive
7840 * reference to itself, either directly or indirectly.
7841 *
7842 * [ WFC: Entity Declared ]
7843 * In a document without any DTD, a document with only an internal DTD
7844 * subset which contains no parameter entity references, or a document
7845 * with "standalone='yes'", ... ... The declaration of a parameter
7846 * entity must precede any reference to it...
7847 *
7848 * [ VC: Entity Declared ]
7849 * In a document with an external subset or external parameter entities
7850 * with "standalone='no'", ... ... The declaration of a parameter entity
7851 * must precede any reference to it...
7852 *
7853 * [ WFC: In DTD ]
7854 * Parameter-entity references may only appear in the DTD.
7855 * NOTE: misleading but this is handled.
7856 */
7857 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7858 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7859 {
7860 const xmlChar *name;
7861 xmlEntityPtr entity = NULL;
7862 xmlParserInputPtr input;
7863
7864 if (RAW != '%')
7865 return;
7866 NEXT;
7867 name = xmlParseName(ctxt);
7868 if (name == NULL) {
7869 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7870 return;
7871 }
7872 if (xmlParserDebugEntities)
7873 xmlGenericError(xmlGenericErrorContext,
7874 "PEReference: %s\n", name);
7875 if (RAW != ';') {
7876 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7877 return;
7878 }
7879
7880 NEXT;
7881
7882 /*
7883 * Increate the number of entity references parsed
7884 */
7885 ctxt->nbentities++;
7886
7887 /*
7888 * Request the entity from SAX
7889 */
7890 if ((ctxt->sax != NULL) &&
7891 (ctxt->sax->getParameterEntity != NULL))
7892 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7893 if (ctxt->instate == XML_PARSER_EOF)
7894 return;
7895 if (entity == NULL) {
7896 /*
7897 * [ WFC: Entity Declared ]
7898 * In a document without any DTD, a document with only an
7899 * internal DTD subset which contains no parameter entity
7900 * references, or a document with "standalone='yes'", ...
7901 * ... The declaration of a parameter entity must precede
7902 * any reference to it...
7903 */
7904 if ((ctxt->standalone == 1) ||
7905 ((ctxt->hasExternalSubset == 0) &&
7906 (ctxt->hasPErefs == 0))) {
7907 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7908 "PEReference: %%%s; not found\n",
7909 name);
7910 } else {
7911 /*
7912 * [ VC: Entity Declared ]
7913 * In a document with an external subset or external
7914 * parameter entities with "standalone='no'", ...
7915 * ... The declaration of a parameter entity must
7916 * precede any reference to it...
7917 */
7918 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7919 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 "PEReference: %%%s; not found\n",
7921 name, NULL);
7922 } else
7923 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7924 "PEReference: %%%s; not found\n",
7925 name, NULL);
7926 ctxt->valid = 0;
7927 }
7928 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7929 } else {
7930 /*
7931 * Internal checking in case the entity quest barfed
7932 */
7933 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7934 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7935 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7936 "Internal: %%%s; is not a parameter entity\n",
7937 name, NULL);
7938 } else {
7939 xmlChar start[4];
7940 xmlCharEncoding enc;
7941
7942 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7943 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7944 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7945 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7946 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7947 (ctxt->replaceEntities == 0) &&
7948 (ctxt->validate == 0))
7949 return;
7950
7951 input = xmlNewEntityInputStream(ctxt, entity);
7952 if (xmlPushInput(ctxt, input) < 0) {
7953 xmlFreeInputStream(input);
7954 return;
7955 }
7956
7957 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7958 /*
7959 * Get the 4 first bytes and decode the charset
7960 * if enc != XML_CHAR_ENCODING_NONE
7961 * plug some encoding conversion routines.
7962 * Note that, since we may have some non-UTF8
7963 * encoding (like UTF16, bug 135229), the 'length'
7964 * is not known, but we can calculate based upon
7965 * the amount of data in the buffer.
7966 */
7967 GROW
7968 if (ctxt->instate == XML_PARSER_EOF)
7969 return;
7970 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7971 start[0] = RAW;
7972 start[1] = NXT(1);
7973 start[2] = NXT(2);
7974 start[3] = NXT(3);
7975 enc = xmlDetectCharEncoding(start, 4);
7976 if (enc != XML_CHAR_ENCODING_NONE) {
7977 xmlSwitchEncoding(ctxt, enc);
7978 }
7979 }
7980
7981 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7982 (IS_BLANK_CH(NXT(5)))) {
7983 xmlParseTextDecl(ctxt);
7984 }
7985 }
7986 }
7987 }
7988 ctxt->hasPErefs = 1;
7989 }
7990
7991 /**
7992 * xmlLoadEntityContent:
7993 * @ctxt: an XML parser context
7994 * @entity: an unloaded system entity
7995 *
7996 * Load the original content of the given system entity from the
7997 * ExternalID/SystemID given. This is to be used for Included in Literal
7998 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7999 *
8000 * Returns 0 in case of success and -1 in case of failure
8001 */
8002 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8003 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8004 xmlParserInputPtr input;
8005 xmlBufferPtr buf;
8006 int l, c;
8007 int count = 0;
8008
8009 if ((ctxt == NULL) || (entity == NULL) ||
8010 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8011 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8012 (entity->content != NULL)) {
8013 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8014 "xmlLoadEntityContent parameter error");
8015 return(-1);
8016 }
8017
8018 if (xmlParserDebugEntities)
8019 xmlGenericError(xmlGenericErrorContext,
8020 "Reading %s entity content input\n", entity->name);
8021
8022 buf = xmlBufferCreate();
8023 if (buf == NULL) {
8024 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8025 "xmlLoadEntityContent parameter error");
8026 return(-1);
8027 }
8028
8029 input = xmlNewEntityInputStream(ctxt, entity);
8030 if (input == NULL) {
8031 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8032 "xmlLoadEntityContent input error");
8033 xmlBufferFree(buf);
8034 return(-1);
8035 }
8036
8037 /*
8038 * Push the entity as the current input, read char by char
8039 * saving to the buffer until the end of the entity or an error
8040 */
8041 if (xmlPushInput(ctxt, input) < 0) {
8042 xmlBufferFree(buf);
8043 return(-1);
8044 }
8045
8046 GROW;
8047 c = CUR_CHAR(l);
8048 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8049 (IS_CHAR(c))) {
8050 xmlBufferAdd(buf, ctxt->input->cur, l);
8051 if (count++ > XML_PARSER_CHUNK_SIZE) {
8052 count = 0;
8053 GROW;
8054 if (ctxt->instate == XML_PARSER_EOF) {
8055 xmlBufferFree(buf);
8056 return(-1);
8057 }
8058 }
8059 NEXTL(l);
8060 c = CUR_CHAR(l);
8061 if (c == 0) {
8062 count = 0;
8063 GROW;
8064 if (ctxt->instate == XML_PARSER_EOF) {
8065 xmlBufferFree(buf);
8066 return(-1);
8067 }
8068 c = CUR_CHAR(l);
8069 }
8070 }
8071
8072 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8073 xmlPopInput(ctxt);
8074 } else if (!IS_CHAR(c)) {
8075 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8076 "xmlLoadEntityContent: invalid char value %d\n",
8077 c);
8078 xmlBufferFree(buf);
8079 return(-1);
8080 }
8081 entity->content = buf->content;
8082 buf->content = NULL;
8083 xmlBufferFree(buf);
8084
8085 return(0);
8086 }
8087
8088 /**
8089 * xmlParseStringPEReference:
8090 * @ctxt: an XML parser context
8091 * @str: a pointer to an index in the string
8092 *
8093 * parse PEReference declarations
8094 *
8095 * [69] PEReference ::= '%' Name ';'
8096 *
8097 * [ WFC: No Recursion ]
8098 * A parsed entity must not contain a recursive
8099 * reference to itself, either directly or indirectly.
8100 *
8101 * [ WFC: Entity Declared ]
8102 * In a document without any DTD, a document with only an internal DTD
8103 * subset which contains no parameter entity references, or a document
8104 * with "standalone='yes'", ... ... The declaration of a parameter
8105 * entity must precede any reference to it...
8106 *
8107 * [ VC: Entity Declared ]
8108 * In a document with an external subset or external parameter entities
8109 * with "standalone='no'", ... ... The declaration of a parameter entity
8110 * must precede any reference to it...
8111 *
8112 * [ WFC: In DTD ]
8113 * Parameter-entity references may only appear in the DTD.
8114 * NOTE: misleading but this is handled.
8115 *
8116 * Returns the string of the entity content.
8117 * str is updated to the current value of the index
8118 */
8119 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8120 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8121 const xmlChar *ptr;
8122 xmlChar cur;
8123 xmlChar *name;
8124 xmlEntityPtr entity = NULL;
8125
8126 if ((str == NULL) || (*str == NULL)) return(NULL);
8127 ptr = *str;
8128 cur = *ptr;
8129 if (cur != '%')
8130 return(NULL);
8131 ptr++;
8132 name = xmlParseStringName(ctxt, &ptr);
8133 if (name == NULL) {
8134 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8135 "xmlParseStringPEReference: no name\n");
8136 *str = ptr;
8137 return(NULL);
8138 }
8139 cur = *ptr;
8140 if (cur != ';') {
8141 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8142 xmlFree(name);
8143 *str = ptr;
8144 return(NULL);
8145 }
8146 ptr++;
8147
8148 /*
8149 * Increate the number of entity references parsed
8150 */
8151 ctxt->nbentities++;
8152
8153 /*
8154 * Request the entity from SAX
8155 */
8156 if ((ctxt->sax != NULL) &&
8157 (ctxt->sax->getParameterEntity != NULL))
8158 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8159 if (ctxt->instate == XML_PARSER_EOF) {
8160 xmlFree(name);
8161 *str = ptr;
8162 return(NULL);
8163 }
8164 if (entity == NULL) {
8165 /*
8166 * [ WFC: Entity Declared ]
8167 * In a document without any DTD, a document with only an
8168 * internal DTD subset which contains no parameter entity
8169 * references, or a document with "standalone='yes'", ...
8170 * ... The declaration of a parameter entity must precede
8171 * any reference to it...
8172 */
8173 if ((ctxt->standalone == 1) ||
8174 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8175 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8176 "PEReference: %%%s; not found\n", name);
8177 } else {
8178 /*
8179 * [ VC: Entity Declared ]
8180 * In a document with an external subset or external
8181 * parameter entities with "standalone='no'", ...
8182 * ... The declaration of a parameter entity must
8183 * precede any reference to it...
8184 */
8185 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8186 "PEReference: %%%s; not found\n",
8187 name, NULL);
8188 ctxt->valid = 0;
8189 }
8190 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8191 } else {
8192 /*
8193 * Internal checking in case the entity quest barfed
8194 */
8195 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8196 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8197 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8198 "%%%s; is not a parameter entity\n",
8199 name, NULL);
8200 }
8201 }
8202 ctxt->hasPErefs = 1;
8203 xmlFree(name);
8204 *str = ptr;
8205 return(entity);
8206 }
8207
8208 /**
8209 * xmlParseDocTypeDecl:
8210 * @ctxt: an XML parser context
8211 *
8212 * parse a DOCTYPE declaration
8213 *
8214 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8215 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8216 *
8217 * [ VC: Root Element Type ]
8218 * The Name in the document type declaration must match the element
8219 * type of the root element.
8220 */
8221
8222 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8223 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8224 const xmlChar *name = NULL;
8225 xmlChar *ExternalID = NULL;
8226 xmlChar *URI = NULL;
8227
8228 /*
8229 * We know that '<!DOCTYPE' has been detected.
8230 */
8231 SKIP(9);
8232
8233 SKIP_BLANKS;
8234
8235 /*
8236 * Parse the DOCTYPE name.
8237 */
8238 name = xmlParseName(ctxt);
8239 if (name == NULL) {
8240 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8241 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8242 }
8243 ctxt->intSubName = name;
8244
8245 SKIP_BLANKS;
8246
8247 /*
8248 * Check for SystemID and ExternalID
8249 */
8250 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8251
8252 if ((URI != NULL) || (ExternalID != NULL)) {
8253 ctxt->hasExternalSubset = 1;
8254 }
8255 ctxt->extSubURI = URI;
8256 ctxt->extSubSystem = ExternalID;
8257
8258 SKIP_BLANKS;
8259
8260 /*
8261 * Create and update the internal subset.
8262 */
8263 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8264 (!ctxt->disableSAX))
8265 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8266 if (ctxt->instate == XML_PARSER_EOF)
8267 return;
8268
8269 /*
8270 * Is there any internal subset declarations ?
8271 * they are handled separately in xmlParseInternalSubset()
8272 */
8273 if (RAW == '[')
8274 return;
8275
8276 /*
8277 * We should be at the end of the DOCTYPE declaration.
8278 */
8279 if (RAW != '>') {
8280 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8281 }
8282 NEXT;
8283 }
8284
8285 /**
8286 * xmlParseInternalSubset:
8287 * @ctxt: an XML parser context
8288 *
8289 * parse the internal subset declaration
8290 *
8291 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8292 */
8293
8294 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8295 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8296 /*
8297 * Is there any DTD definition ?
8298 */
8299 if (RAW == '[') {
8300 int baseInputNr = ctxt->inputNr;
8301 ctxt->instate = XML_PARSER_DTD;
8302 NEXT;
8303 /*
8304 * Parse the succession of Markup declarations and
8305 * PEReferences.
8306 * Subsequence (markupdecl | PEReference | S)*
8307 */
8308 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8309 (ctxt->instate != XML_PARSER_EOF)) {
8310 const xmlChar *check = CUR_PTR;
8311 unsigned int cons = ctxt->input->consumed;
8312
8313 SKIP_BLANKS;
8314 xmlParseMarkupDecl(ctxt);
8315 xmlParsePEReference(ctxt);
8316
8317 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8318 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8319 "xmlParseInternalSubset: error detected in Markup declaration\n");
8320 if (ctxt->inputNr > baseInputNr)
8321 xmlPopInput(ctxt);
8322 else
8323 break;
8324 }
8325 }
8326 if (RAW == ']') {
8327 NEXT;
8328 SKIP_BLANKS;
8329 }
8330 }
8331
8332 /*
8333 * We should be at the end of the DOCTYPE declaration.
8334 */
8335 if (RAW != '>') {
8336 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8337 return;
8338 }
8339 NEXT;
8340 }
8341
8342 #ifdef LIBXML_SAX1_ENABLED
8343 /**
8344 * xmlParseAttribute:
8345 * @ctxt: an XML parser context
8346 * @value: a xmlChar ** used to store the value of the attribute
8347 *
8348 * parse an attribute
8349 *
8350 * [41] Attribute ::= Name Eq AttValue
8351 *
8352 * [ WFC: No External Entity References ]
8353 * Attribute values cannot contain direct or indirect entity references
8354 * to external entities.
8355 *
8356 * [ WFC: No < in Attribute Values ]
8357 * The replacement text of any entity referred to directly or indirectly in
8358 * an attribute value (other than "<") must not contain a <.
8359 *
8360 * [ VC: Attribute Value Type ]
8361 * The attribute must have been declared; the value must be of the type
8362 * declared for it.
8363 *
8364 * [25] Eq ::= S? '=' S?
8365 *
8366 * With namespace:
8367 *
8368 * [NS 11] Attribute ::= QName Eq AttValue
8369 *
8370 * Also the case QName == xmlns:??? is handled independently as a namespace
8371 * definition.
8372 *
8373 * Returns the attribute name, and the value in *value.
8374 */
8375
8376 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8377 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8378 const xmlChar *name;
8379 xmlChar *val;
8380
8381 *value = NULL;
8382 GROW;
8383 name = xmlParseName(ctxt);
8384 if (name == NULL) {
8385 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8386 "error parsing attribute name\n");
8387 return(NULL);
8388 }
8389
8390 /*
8391 * read the value
8392 */
8393 SKIP_BLANKS;
8394 if (RAW == '=') {
8395 NEXT;
8396 SKIP_BLANKS;
8397 val = xmlParseAttValue(ctxt);
8398 ctxt->instate = XML_PARSER_CONTENT;
8399 } else {
8400 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8401 "Specification mandates value for attribute %s\n", name);
8402 return(NULL);
8403 }
8404
8405 /*
8406 * Check that xml:lang conforms to the specification
8407 * No more registered as an error, just generate a warning now
8408 * since this was deprecated in XML second edition
8409 */
8410 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8411 if (!xmlCheckLanguageID(val)) {
8412 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8413 "Malformed value for xml:lang : %s\n",
8414 val, NULL);
8415 }
8416 }
8417
8418 /*
8419 * Check that xml:space conforms to the specification
8420 */
8421 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8422 if (xmlStrEqual(val, BAD_CAST "default"))
8423 *(ctxt->space) = 0;
8424 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8425 *(ctxt->space) = 1;
8426 else {
8427 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8428 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8429 val, NULL);
8430 }
8431 }
8432
8433 *value = val;
8434 return(name);
8435 }
8436
8437 /**
8438 * xmlParseStartTag:
8439 * @ctxt: an XML parser context
8440 *
8441 * parse a start of tag either for rule element or
8442 * EmptyElement. In both case we don't parse the tag closing chars.
8443 *
8444 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8445 *
8446 * [ WFC: Unique Att Spec ]
8447 * No attribute name may appear more than once in the same start-tag or
8448 * empty-element tag.
8449 *
8450 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8451 *
8452 * [ WFC: Unique Att Spec ]
8453 * No attribute name may appear more than once in the same start-tag or
8454 * empty-element tag.
8455 *
8456 * With namespace:
8457 *
8458 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8459 *
8460 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8461 *
8462 * Returns the element name parsed
8463 */
8464
8465 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8466 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8467 const xmlChar *name;
8468 const xmlChar *attname;
8469 xmlChar *attvalue;
8470 const xmlChar **atts = ctxt->atts;
8471 int nbatts = 0;
8472 int maxatts = ctxt->maxatts;
8473 int i;
8474
8475 if (RAW != '<') return(NULL);
8476 NEXT1;
8477
8478 name = xmlParseName(ctxt);
8479 if (name == NULL) {
8480 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8481 "xmlParseStartTag: invalid element name\n");
8482 return(NULL);
8483 }
8484
8485 /*
8486 * Now parse the attributes, it ends up with the ending
8487 *
8488 * (S Attribute)* S?
8489 */
8490 SKIP_BLANKS;
8491 GROW;
8492
8493 while (((RAW != '>') &&
8494 ((RAW != '/') || (NXT(1) != '>')) &&
8495 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8496 const xmlChar *q = CUR_PTR;
8497 unsigned int cons = ctxt->input->consumed;
8498
8499 attname = xmlParseAttribute(ctxt, &attvalue);
8500 if ((attname != NULL) && (attvalue != NULL)) {
8501 /*
8502 * [ WFC: Unique Att Spec ]
8503 * No attribute name may appear more than once in the same
8504 * start-tag or empty-element tag.
8505 */
8506 for (i = 0; i < nbatts;i += 2) {
8507 if (xmlStrEqual(atts[i], attname)) {
8508 xmlErrAttributeDup(ctxt, NULL, attname);
8509 xmlFree(attvalue);
8510 goto failed;
8511 }
8512 }
8513 /*
8514 * Add the pair to atts
8515 */
8516 if (atts == NULL) {
8517 maxatts = 22; /* allow for 10 attrs by default */
8518 atts = (const xmlChar **)
8519 xmlMalloc(maxatts * sizeof(xmlChar *));
8520 if (atts == NULL) {
8521 xmlErrMemory(ctxt, NULL);
8522 if (attvalue != NULL)
8523 xmlFree(attvalue);
8524 goto failed;
8525 }
8526 ctxt->atts = atts;
8527 ctxt->maxatts = maxatts;
8528 } else if (nbatts + 4 > maxatts) {
8529 const xmlChar **n;
8530
8531 maxatts *= 2;
8532 n = (const xmlChar **) xmlRealloc((void *) atts,
8533 maxatts * sizeof(const xmlChar *));
8534 if (n == NULL) {
8535 xmlErrMemory(ctxt, NULL);
8536 if (attvalue != NULL)
8537 xmlFree(attvalue);
8538 goto failed;
8539 }
8540 atts = n;
8541 ctxt->atts = atts;
8542 ctxt->maxatts = maxatts;
8543 }
8544 atts[nbatts++] = attname;
8545 atts[nbatts++] = attvalue;
8546 atts[nbatts] = NULL;
8547 atts[nbatts + 1] = NULL;
8548 } else {
8549 if (attvalue != NULL)
8550 xmlFree(attvalue);
8551 }
8552
8553 failed:
8554
8555 GROW
8556 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8557 break;
8558 if (SKIP_BLANKS == 0) {
8559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8560 "attributes construct error\n");
8561 }
8562 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8563 (attname == NULL) && (attvalue == NULL)) {
8564 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8565 "xmlParseStartTag: problem parsing attributes\n");
8566 break;
8567 }
8568 SHRINK;
8569 GROW;
8570 }
8571
8572 /*
8573 * SAX: Start of Element !
8574 */
8575 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8576 (!ctxt->disableSAX)) {
8577 if (nbatts > 0)
8578 ctxt->sax->startElement(ctxt->userData, name, atts);
8579 else
8580 ctxt->sax->startElement(ctxt->userData, name, NULL);
8581 }
8582
8583 if (atts != NULL) {
8584 /* Free only the content strings */
8585 for (i = 1;i < nbatts;i+=2)
8586 if (atts[i] != NULL)
8587 xmlFree((xmlChar *) atts[i]);
8588 }
8589 return(name);
8590 }
8591
8592 /**
8593 * xmlParseEndTag1:
8594 * @ctxt: an XML parser context
8595 * @line: line of the start tag
8596 * @nsNr: number of namespaces on the start tag
8597 *
8598 * parse an end of tag
8599 *
8600 * [42] ETag ::= '</' Name S? '>'
8601 *
8602 * With namespace
8603 *
8604 * [NS 9] ETag ::= '</' QName S? '>'
8605 */
8606
8607 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8608 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8609 const xmlChar *name;
8610
8611 GROW;
8612 if ((RAW != '<') || (NXT(1) != '/')) {
8613 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8614 "xmlParseEndTag: '</' not found\n");
8615 return;
8616 }
8617 SKIP(2);
8618
8619 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8620
8621 /*
8622 * We should definitely be at the ending "S? '>'" part
8623 */
8624 GROW;
8625 SKIP_BLANKS;
8626 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8627 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8628 } else
8629 NEXT1;
8630
8631 /*
8632 * [ WFC: Element Type Match ]
8633 * The Name in an element's end-tag must match the element type in the
8634 * start-tag.
8635 *
8636 */
8637 if (name != (xmlChar*)1) {
8638 if (name == NULL) name = BAD_CAST "unparseable";
8639 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8640 "Opening and ending tag mismatch: %s line %d and %s\n",
8641 ctxt->name, line, name);
8642 }
8643
8644 /*
8645 * SAX: End of Tag
8646 */
8647 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8648 (!ctxt->disableSAX))
8649 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8650
8651 namePop(ctxt);
8652 spacePop(ctxt);
8653 return;
8654 }
8655
8656 /**
8657 * xmlParseEndTag:
8658 * @ctxt: an XML parser context
8659 *
8660 * parse an end of tag
8661 *
8662 * [42] ETag ::= '</' Name S? '>'
8663 *
8664 * With namespace
8665 *
8666 * [NS 9] ETag ::= '</' QName S? '>'
8667 */
8668
8669 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8670 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8671 xmlParseEndTag1(ctxt, 0);
8672 }
8673 #endif /* LIBXML_SAX1_ENABLED */
8674
8675 /************************************************************************
8676 * *
8677 * SAX 2 specific operations *
8678 * *
8679 ************************************************************************/
8680
8681 /*
8682 * xmlGetNamespace:
8683 * @ctxt: an XML parser context
8684 * @prefix: the prefix to lookup
8685 *
8686 * Lookup the namespace name for the @prefix (which ca be NULL)
8687 * The prefix must come from the @ctxt->dict dictionary
8688 *
8689 * Returns the namespace name or NULL if not bound
8690 */
8691 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8692 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8693 int i;
8694
8695 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8696 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8697 if (ctxt->nsTab[i] == prefix) {
8698 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8699 return(NULL);
8700 return(ctxt->nsTab[i + 1]);
8701 }
8702 return(NULL);
8703 }
8704
8705 /**
8706 * xmlParseQName:
8707 * @ctxt: an XML parser context
8708 * @prefix: pointer to store the prefix part
8709 *
8710 * parse an XML Namespace QName
8711 *
8712 * [6] QName ::= (Prefix ':')? LocalPart
8713 * [7] Prefix ::= NCName
8714 * [8] LocalPart ::= NCName
8715 *
8716 * Returns the Name parsed or NULL
8717 */
8718
8719 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8720 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8721 const xmlChar *l, *p;
8722
8723 GROW;
8724
8725 l = xmlParseNCName(ctxt);
8726 if (l == NULL) {
8727 if (CUR == ':') {
8728 l = xmlParseName(ctxt);
8729 if (l != NULL) {
8730 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8731 "Failed to parse QName '%s'\n", l, NULL, NULL);
8732 *prefix = NULL;
8733 return(l);
8734 }
8735 }
8736 return(NULL);
8737 }
8738 if (CUR == ':') {
8739 NEXT;
8740 p = l;
8741 l = xmlParseNCName(ctxt);
8742 if (l == NULL) {
8743 xmlChar *tmp;
8744
8745 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8746 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8747 l = xmlParseNmtoken(ctxt);
8748 if (l == NULL)
8749 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8750 else {
8751 tmp = xmlBuildQName(l, p, NULL, 0);
8752 xmlFree((char *)l);
8753 }
8754 p = xmlDictLookup(ctxt->dict, tmp, -1);
8755 if (tmp != NULL) xmlFree(tmp);
8756 *prefix = NULL;
8757 return(p);
8758 }
8759 if (CUR == ':') {
8760 xmlChar *tmp;
8761
8762 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8763 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8764 NEXT;
8765 tmp = (xmlChar *) xmlParseName(ctxt);
8766 if (tmp != NULL) {
8767 tmp = xmlBuildQName(tmp, l, NULL, 0);
8768 l = xmlDictLookup(ctxt->dict, tmp, -1);
8769 if (tmp != NULL) xmlFree(tmp);
8770 *prefix = p;
8771 return(l);
8772 }
8773 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8774 l = xmlDictLookup(ctxt->dict, tmp, -1);
8775 if (tmp != NULL) xmlFree(tmp);
8776 *prefix = p;
8777 return(l);
8778 }
8779 *prefix = p;
8780 } else
8781 *prefix = NULL;
8782 return(l);
8783 }
8784
8785 /**
8786 * xmlParseQNameAndCompare:
8787 * @ctxt: an XML parser context
8788 * @name: the localname
8789 * @prefix: the prefix, if any.
8790 *
8791 * parse an XML name and compares for match
8792 * (specialized for endtag parsing)
8793 *
8794 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8795 * and the name for mismatch
8796 */
8797
8798 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8799 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8800 xmlChar const *prefix) {
8801 const xmlChar *cmp;
8802 const xmlChar *in;
8803 const xmlChar *ret;
8804 const xmlChar *prefix2;
8805
8806 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8807
8808 GROW;
8809 in = ctxt->input->cur;
8810
8811 cmp = prefix;
8812 while (*in != 0 && *in == *cmp) {
8813 ++in;
8814 ++cmp;
8815 }
8816 if ((*cmp == 0) && (*in == ':')) {
8817 in++;
8818 cmp = name;
8819 while (*in != 0 && *in == *cmp) {
8820 ++in;
8821 ++cmp;
8822 }
8823 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8824 /* success */
8825 ctxt->input->cur = in;
8826 return((const xmlChar*) 1);
8827 }
8828 }
8829 /*
8830 * all strings coms from the dictionary, equality can be done directly
8831 */
8832 ret = xmlParseQName (ctxt, &prefix2);
8833 if ((ret == name) && (prefix == prefix2))
8834 return((const xmlChar*) 1);
8835 return ret;
8836 }
8837
8838 /**
8839 * xmlParseAttValueInternal:
8840 * @ctxt: an XML parser context
8841 * @len: attribute len result
8842 * @alloc: whether the attribute was reallocated as a new string
8843 * @normalize: if 1 then further non-CDATA normalization must be done
8844 *
8845 * parse a value for an attribute.
8846 * NOTE: if no normalization is needed, the routine will return pointers
8847 * directly from the data buffer.
8848 *
8849 * 3.3.3 Attribute-Value Normalization:
8850 * Before the value of an attribute is passed to the application or
8851 * checked for validity, the XML processor must normalize it as follows:
8852 * - a character reference is processed by appending the referenced
8853 * character to the attribute value
8854 * - an entity reference is processed by recursively processing the
8855 * replacement text of the entity
8856 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8857 * appending #x20 to the normalized value, except that only a single
8858 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8859 * parsed entity or the literal entity value of an internal parsed entity
8860 * - other characters are processed by appending them to the normalized value
8861 * If the declared value is not CDATA, then the XML processor must further
8862 * process the normalized attribute value by discarding any leading and
8863 * trailing space (#x20) characters, and by replacing sequences of space
8864 * (#x20) characters by a single space (#x20) character.
8865 * All attributes for which no declaration has been read should be treated
8866 * by a non-validating parser as if declared CDATA.
8867 *
8868 * Returns the AttValue parsed or NULL. The value has to be freed by the
8869 * caller if it was copied, this can be detected by val[*len] == 0.
8870 */
8871
8872 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8873 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8874 int normalize)
8875 {
8876 xmlChar limit = 0;
8877 const xmlChar *in = NULL, *start, *end, *last;
8878 xmlChar *ret = NULL;
8879 int line, col;
8880
8881 GROW;
8882 in = (xmlChar *) CUR_PTR;
8883 line = ctxt->input->line;
8884 col = ctxt->input->col;
8885 if (*in != '"' && *in != '\'') {
8886 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8887 return (NULL);
8888 }
8889 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8890
8891 /*
8892 * try to handle in this routine the most common case where no
8893 * allocation of a new string is required and where content is
8894 * pure ASCII.
8895 */
8896 limit = *in++;
8897 col++;
8898 end = ctxt->input->end;
8899 start = in;
8900 if (in >= end) {
8901 const xmlChar *oldbase = ctxt->input->base;
8902 GROW;
8903 if (oldbase != ctxt->input->base) {
8904 long delta = ctxt->input->base - oldbase;
8905 start = start + delta;
8906 in = in + delta;
8907 }
8908 end = ctxt->input->end;
8909 }
8910 if (normalize) {
8911 /*
8912 * Skip any leading spaces
8913 */
8914 while ((in < end) && (*in != limit) &&
8915 ((*in == 0x20) || (*in == 0x9) ||
8916 (*in == 0xA) || (*in == 0xD))) {
8917 if (*in == 0xA) {
8918 line++; col = 1;
8919 } else {
8920 col++;
8921 }
8922 in++;
8923 start = in;
8924 if (in >= end) {
8925 const xmlChar *oldbase = ctxt->input->base;
8926 GROW;
8927 if (ctxt->instate == XML_PARSER_EOF)
8928 return(NULL);
8929 if (oldbase != ctxt->input->base) {
8930 long delta = ctxt->input->base - oldbase;
8931 start = start + delta;
8932 in = in + delta;
8933 }
8934 end = ctxt->input->end;
8935 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8936 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8937 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8938 "AttValue length too long\n");
8939 return(NULL);
8940 }
8941 }
8942 }
8943 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8944 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8945 col++;
8946 if ((*in++ == 0x20) && (*in == 0x20)) break;
8947 if (in >= end) {
8948 const xmlChar *oldbase = ctxt->input->base;
8949 GROW;
8950 if (ctxt->instate == XML_PARSER_EOF)
8951 return(NULL);
8952 if (oldbase != ctxt->input->base) {
8953 long delta = ctxt->input->base - oldbase;
8954 start = start + delta;
8955 in = in + delta;
8956 }
8957 end = ctxt->input->end;
8958 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8959 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8960 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8961 "AttValue length too long\n");
8962 return(NULL);
8963 }
8964 }
8965 }
8966 last = in;
8967 /*
8968 * skip the trailing blanks
8969 */
8970 while ((last[-1] == 0x20) && (last > start)) last--;
8971 while ((in < end) && (*in != limit) &&
8972 ((*in == 0x20) || (*in == 0x9) ||
8973 (*in == 0xA) || (*in == 0xD))) {
8974 if (*in == 0xA) {
8975 line++, col = 1;
8976 } else {
8977 col++;
8978 }
8979 in++;
8980 if (in >= end) {
8981 const xmlChar *oldbase = ctxt->input->base;
8982 GROW;
8983 if (ctxt->instate == XML_PARSER_EOF)
8984 return(NULL);
8985 if (oldbase != ctxt->input->base) {
8986 long delta = ctxt->input->base - oldbase;
8987 start = start + delta;
8988 in = in + delta;
8989 last = last + delta;
8990 }
8991 end = ctxt->input->end;
8992 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8993 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8994 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8995 "AttValue length too long\n");
8996 return(NULL);
8997 }
8998 }
8999 }
9000 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9001 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9002 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9003 "AttValue length too long\n");
9004 return(NULL);
9005 }
9006 if (*in != limit) goto need_complex;
9007 } else {
9008 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9009 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9010 in++;
9011 col++;
9012 if (in >= end) {
9013 const xmlChar *oldbase = ctxt->input->base;
9014 GROW;
9015 if (ctxt->instate == XML_PARSER_EOF)
9016 return(NULL);
9017 if (oldbase != ctxt->input->base) {
9018 long delta = ctxt->input->base - oldbase;
9019 start = start + delta;
9020 in = in + delta;
9021 }
9022 end = ctxt->input->end;
9023 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9024 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9025 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9026 "AttValue length too long\n");
9027 return(NULL);
9028 }
9029 }
9030 }
9031 last = in;
9032 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9033 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9034 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9035 "AttValue length too long\n");
9036 return(NULL);
9037 }
9038 if (*in != limit) goto need_complex;
9039 }
9040 in++;
9041 col++;
9042 if (len != NULL) {
9043 *len = last - start;
9044 ret = (xmlChar *) start;
9045 } else {
9046 if (alloc) *alloc = 1;
9047 ret = xmlStrndup(start, last - start);
9048 }
9049 CUR_PTR = in;
9050 ctxt->input->line = line;
9051 ctxt->input->col = col;
9052 if (alloc) *alloc = 0;
9053 return ret;
9054 need_complex:
9055 if (alloc) *alloc = 1;
9056 return xmlParseAttValueComplex(ctxt, len, normalize);
9057 }
9058
9059 /**
9060 * xmlParseAttribute2:
9061 * @ctxt: an XML parser context
9062 * @pref: the element prefix
9063 * @elem: the element name
9064 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9065 * @value: a xmlChar ** used to store the value of the attribute
9066 * @len: an int * to save the length of the attribute
9067 * @alloc: an int * to indicate if the attribute was allocated
9068 *
9069 * parse an attribute in the new SAX2 framework.
9070 *
9071 * Returns the attribute name, and the value in *value, .
9072 */
9073
9074 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9075 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9076 const xmlChar * pref, const xmlChar * elem,
9077 const xmlChar ** prefix, xmlChar ** value,
9078 int *len, int *alloc)
9079 {
9080 const xmlChar *name;
9081 xmlChar *val, *internal_val = NULL;
9082 int normalize = 0;
9083
9084 *value = NULL;
9085 GROW;
9086 name = xmlParseQName(ctxt, prefix);
9087 if (name == NULL) {
9088 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9089 "error parsing attribute name\n");
9090 return (NULL);
9091 }
9092
9093 /*
9094 * get the type if needed
9095 */
9096 if (ctxt->attsSpecial != NULL) {
9097 int type;
9098
9099 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9100 pref, elem, *prefix, name);
9101 if (type != 0)
9102 normalize = 1;
9103 }
9104
9105 /*
9106 * read the value
9107 */
9108 SKIP_BLANKS;
9109 if (RAW == '=') {
9110 NEXT;
9111 SKIP_BLANKS;
9112 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9113 if (normalize) {
9114 /*
9115 * Sometimes a second normalisation pass for spaces is needed
9116 * but that only happens if charrefs or entities refernces
9117 * have been used in the attribute value, i.e. the attribute
9118 * value have been extracted in an allocated string already.
9119 */
9120 if (*alloc) {
9121 const xmlChar *val2;
9122
9123 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9124 if ((val2 != NULL) && (val2 != val)) {
9125 xmlFree(val);
9126 val = (xmlChar *) val2;
9127 }
9128 }
9129 }
9130 ctxt->instate = XML_PARSER_CONTENT;
9131 } else {
9132 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9133 "Specification mandates value for attribute %s\n",
9134 name);
9135 return (NULL);
9136 }
9137
9138 if (*prefix == ctxt->str_xml) {
9139 /*
9140 * Check that xml:lang conforms to the specification
9141 * No more registered as an error, just generate a warning now
9142 * since this was deprecated in XML second edition
9143 */
9144 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9145 internal_val = xmlStrndup(val, *len);
9146 if (!xmlCheckLanguageID(internal_val)) {
9147 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9148 "Malformed value for xml:lang : %s\n",
9149 internal_val, NULL);
9150 }
9151 }
9152
9153 /*
9154 * Check that xml:space conforms to the specification
9155 */
9156 if (xmlStrEqual(name, BAD_CAST "space")) {
9157 internal_val = xmlStrndup(val, *len);
9158 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9159 *(ctxt->space) = 0;
9160 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9161 *(ctxt->space) = 1;
9162 else {
9163 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9164 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9165 internal_val, NULL);
9166 }
9167 }
9168 if (internal_val) {
9169 xmlFree(internal_val);
9170 }
9171 }
9172
9173 *value = val;
9174 return (name);
9175 }
9176 /**
9177 * xmlParseStartTag2:
9178 * @ctxt: an XML parser context
9179 *
9180 * parse a start of tag either for rule element or
9181 * EmptyElement. In both case we don't parse the tag closing chars.
9182 * This routine is called when running SAX2 parsing
9183 *
9184 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9185 *
9186 * [ WFC: Unique Att Spec ]
9187 * No attribute name may appear more than once in the same start-tag or
9188 * empty-element tag.
9189 *
9190 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9191 *
9192 * [ WFC: Unique Att Spec ]
9193 * No attribute name may appear more than once in the same start-tag or
9194 * empty-element tag.
9195 *
9196 * With namespace:
9197 *
9198 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9199 *
9200 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9201 *
9202 * Returns the element name parsed
9203 */
9204
9205 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9206 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9207 const xmlChar **URI, int *tlen) {
9208 const xmlChar *localname;
9209 const xmlChar *prefix;
9210 const xmlChar *attname;
9211 const xmlChar *aprefix;
9212 const xmlChar *nsname;
9213 xmlChar *attvalue;
9214 const xmlChar **atts = ctxt->atts;
9215 int maxatts = ctxt->maxatts;
9216 int nratts, nbatts, nbdef, inputid;
9217 int i, j, nbNs, attval;
9218 unsigned long cur;
9219 int nsNr = ctxt->nsNr;
9220
9221 if (RAW != '<') return(NULL);
9222 NEXT1;
9223
9224 /*
9225 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9226 * point since the attribute values may be stored as pointers to
9227 * the buffer and calling SHRINK would destroy them !
9228 * The Shrinking is only possible once the full set of attribute
9229 * callbacks have been done.
9230 */
9231 SHRINK;
9232 cur = ctxt->input->cur - ctxt->input->base;
9233 inputid = ctxt->input->id;
9234 nbatts = 0;
9235 nratts = 0;
9236 nbdef = 0;
9237 nbNs = 0;
9238 attval = 0;
9239 /* Forget any namespaces added during an earlier parse of this element. */
9240 ctxt->nsNr = nsNr;
9241
9242 localname = xmlParseQName(ctxt, &prefix);
9243 if (localname == NULL) {
9244 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9245 "StartTag: invalid element name\n");
9246 return(NULL);
9247 }
9248 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9249
9250 /*
9251 * Now parse the attributes, it ends up with the ending
9252 *
9253 * (S Attribute)* S?
9254 */
9255 SKIP_BLANKS;
9256 GROW;
9257
9258 while (((RAW != '>') &&
9259 ((RAW != '/') || (NXT(1) != '>')) &&
9260 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9261 const xmlChar *q = CUR_PTR;
9262 unsigned int cons = ctxt->input->consumed;
9263 int len = -1, alloc = 0;
9264
9265 attname = xmlParseAttribute2(ctxt, prefix, localname,
9266 &aprefix, &attvalue, &len, &alloc);
9267 if ((attname == NULL) || (attvalue == NULL))
9268 goto next_attr;
9269 if (len < 0) len = xmlStrlen(attvalue);
9270
9271 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9272 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9273 xmlURIPtr uri;
9274
9275 if (URL == NULL) {
9276 xmlErrMemory(ctxt, "dictionary allocation failure");
9277 if ((attvalue != NULL) && (alloc != 0))
9278 xmlFree(attvalue);
9279 return(NULL);
9280 }
9281 if (*URL != 0) {
9282 uri = xmlParseURI((const char *) URL);
9283 if (uri == NULL) {
9284 xmlNsErr(ctxt, XML_WAR_NS_URI,
9285 "xmlns: '%s' is not a valid URI\n",
9286 URL, NULL, NULL);
9287 } else {
9288 if (uri->scheme == NULL) {
9289 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9290 "xmlns: URI %s is not absolute\n",
9291 URL, NULL, NULL);
9292 }
9293 xmlFreeURI(uri);
9294 }
9295 if (URL == ctxt->str_xml_ns) {
9296 if (attname != ctxt->str_xml) {
9297 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9298 "xml namespace URI cannot be the default namespace\n",
9299 NULL, NULL, NULL);
9300 }
9301 goto next_attr;
9302 }
9303 if ((len == 29) &&
9304 (xmlStrEqual(URL,
9305 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9306 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9307 "reuse of the xmlns namespace name is forbidden\n",
9308 NULL, NULL, NULL);
9309 goto next_attr;
9310 }
9311 }
9312 /*
9313 * check that it's not a defined namespace
9314 */
9315 for (j = 1;j <= nbNs;j++)
9316 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9317 break;
9318 if (j <= nbNs)
9319 xmlErrAttributeDup(ctxt, NULL, attname);
9320 else
9321 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9322
9323 } else if (aprefix == ctxt->str_xmlns) {
9324 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9325 xmlURIPtr uri;
9326
9327 if (attname == ctxt->str_xml) {
9328 if (URL != ctxt->str_xml_ns) {
9329 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330 "xml namespace prefix mapped to wrong URI\n",
9331 NULL, NULL, NULL);
9332 }
9333 /*
9334 * Do not keep a namespace definition node
9335 */
9336 goto next_attr;
9337 }
9338 if (URL == ctxt->str_xml_ns) {
9339 if (attname != ctxt->str_xml) {
9340 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9341 "xml namespace URI mapped to wrong prefix\n",
9342 NULL, NULL, NULL);
9343 }
9344 goto next_attr;
9345 }
9346 if (attname == ctxt->str_xmlns) {
9347 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348 "redefinition of the xmlns prefix is forbidden\n",
9349 NULL, NULL, NULL);
9350 goto next_attr;
9351 }
9352 if ((len == 29) &&
9353 (xmlStrEqual(URL,
9354 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9355 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356 "reuse of the xmlns namespace name is forbidden\n",
9357 NULL, NULL, NULL);
9358 goto next_attr;
9359 }
9360 if ((URL == NULL) || (URL[0] == 0)) {
9361 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9362 "xmlns:%s: Empty XML namespace is not allowed\n",
9363 attname, NULL, NULL);
9364 goto next_attr;
9365 } else {
9366 uri = xmlParseURI((const char *) URL);
9367 if (uri == NULL) {
9368 xmlNsErr(ctxt, XML_WAR_NS_URI,
9369 "xmlns:%s: '%s' is not a valid URI\n",
9370 attname, URL, NULL);
9371 } else {
9372 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9373 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9374 "xmlns:%s: URI %s is not absolute\n",
9375 attname, URL, NULL);
9376 }
9377 xmlFreeURI(uri);
9378 }
9379 }
9380
9381 /*
9382 * check that it's not a defined namespace
9383 */
9384 for (j = 1;j <= nbNs;j++)
9385 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9386 break;
9387 if (j <= nbNs)
9388 xmlErrAttributeDup(ctxt, aprefix, attname);
9389 else
9390 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9391
9392 } else {
9393 /*
9394 * Add the pair to atts
9395 */
9396 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9397 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9398 goto next_attr;
9399 }
9400 maxatts = ctxt->maxatts;
9401 atts = ctxt->atts;
9402 }
9403 ctxt->attallocs[nratts++] = alloc;
9404 atts[nbatts++] = attname;
9405 atts[nbatts++] = aprefix;
9406 /*
9407 * The namespace URI field is used temporarily to point at the
9408 * base of the current input buffer for non-alloced attributes.
9409 * When the input buffer is reallocated, all the pointers become
9410 * invalid, but they can be reconstructed later.
9411 */
9412 if (alloc)
9413 atts[nbatts++] = NULL;
9414 else
9415 atts[nbatts++] = ctxt->input->base;
9416 atts[nbatts++] = attvalue;
9417 attvalue += len;
9418 atts[nbatts++] = attvalue;
9419 /*
9420 * tag if some deallocation is needed
9421 */
9422 if (alloc != 0) attval = 1;
9423 attvalue = NULL; /* moved into atts */
9424 }
9425
9426 next_attr:
9427 if ((attvalue != NULL) && (alloc != 0)) {
9428 xmlFree(attvalue);
9429 attvalue = NULL;
9430 }
9431
9432 GROW
9433 if (ctxt->instate == XML_PARSER_EOF)
9434 break;
9435 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9436 break;
9437 if (SKIP_BLANKS == 0) {
9438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9439 "attributes construct error\n");
9440 break;
9441 }
9442 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9443 (attname == NULL) && (attvalue == NULL)) {
9444 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9445 "xmlParseStartTag: problem parsing attributes\n");
9446 break;
9447 }
9448 GROW;
9449 }
9450
9451 if (ctxt->input->id != inputid) {
9452 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9453 "Unexpected change of input\n");
9454 localname = NULL;
9455 goto done;
9456 }
9457
9458 /* Reconstruct attribute value pointers. */
9459 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9460 if (atts[i+2] != NULL) {
9461 /*
9462 * Arithmetic on dangling pointers is technically undefined
9463 * behavior, but well...
9464 */
9465 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9466 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9467 atts[i+3] += offset; /* value */
9468 atts[i+4] += offset; /* valuend */
9469 }
9470 }
9471
9472 /*
9473 * The attributes defaulting
9474 */
9475 if (ctxt->attsDefault != NULL) {
9476 xmlDefAttrsPtr defaults;
9477
9478 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9479 if (defaults != NULL) {
9480 for (i = 0;i < defaults->nbAttrs;i++) {
9481 attname = defaults->values[5 * i];
9482 aprefix = defaults->values[5 * i + 1];
9483
9484 /*
9485 * special work for namespaces defaulted defs
9486 */
9487 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9488 /*
9489 * check that it's not a defined namespace
9490 */
9491 for (j = 1;j <= nbNs;j++)
9492 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9493 break;
9494 if (j <= nbNs) continue;
9495
9496 nsname = xmlGetNamespace(ctxt, NULL);
9497 if (nsname != defaults->values[5 * i + 2]) {
9498 if (nsPush(ctxt, NULL,
9499 defaults->values[5 * i + 2]) > 0)
9500 nbNs++;
9501 }
9502 } else if (aprefix == ctxt->str_xmlns) {
9503 /*
9504 * check that it's not a defined namespace
9505 */
9506 for (j = 1;j <= nbNs;j++)
9507 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9508 break;
9509 if (j <= nbNs) continue;
9510
9511 nsname = xmlGetNamespace(ctxt, attname);
9512 if (nsname != defaults->values[2]) {
9513 if (nsPush(ctxt, attname,
9514 defaults->values[5 * i + 2]) > 0)
9515 nbNs++;
9516 }
9517 } else {
9518 /*
9519 * check that it's not a defined attribute
9520 */
9521 for (j = 0;j < nbatts;j+=5) {
9522 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9523 break;
9524 }
9525 if (j < nbatts) continue;
9526
9527 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9528 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9529 return(NULL);
9530 }
9531 maxatts = ctxt->maxatts;
9532 atts = ctxt->atts;
9533 }
9534 atts[nbatts++] = attname;
9535 atts[nbatts++] = aprefix;
9536 if (aprefix == NULL)
9537 atts[nbatts++] = NULL;
9538 else
9539 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9540 atts[nbatts++] = defaults->values[5 * i + 2];
9541 atts[nbatts++] = defaults->values[5 * i + 3];
9542 if ((ctxt->standalone == 1) &&
9543 (defaults->values[5 * i + 4] != NULL)) {
9544 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9545 "standalone: attribute %s on %s defaulted from external subset\n",
9546 attname, localname);
9547 }
9548 nbdef++;
9549 }
9550 }
9551 }
9552 }
9553
9554 /*
9555 * The attributes checkings
9556 */
9557 for (i = 0; i < nbatts;i += 5) {
9558 /*
9559 * The default namespace does not apply to attribute names.
9560 */
9561 if (atts[i + 1] != NULL) {
9562 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9563 if (nsname == NULL) {
9564 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9565 "Namespace prefix %s for %s on %s is not defined\n",
9566 atts[i + 1], atts[i], localname);
9567 }
9568 atts[i + 2] = nsname;
9569 } else
9570 nsname = NULL;
9571 /*
9572 * [ WFC: Unique Att Spec ]
9573 * No attribute name may appear more than once in the same
9574 * start-tag or empty-element tag.
9575 * As extended by the Namespace in XML REC.
9576 */
9577 for (j = 0; j < i;j += 5) {
9578 if (atts[i] == atts[j]) {
9579 if (atts[i+1] == atts[j+1]) {
9580 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9581 break;
9582 }
9583 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9584 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9585 "Namespaced Attribute %s in '%s' redefined\n",
9586 atts[i], nsname, NULL);
9587 break;
9588 }
9589 }
9590 }
9591 }
9592
9593 nsname = xmlGetNamespace(ctxt, prefix);
9594 if ((prefix != NULL) && (nsname == NULL)) {
9595 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9596 "Namespace prefix %s on %s is not defined\n",
9597 prefix, localname, NULL);
9598 }
9599 *pref = prefix;
9600 *URI = nsname;
9601
9602 /*
9603 * SAX: Start of Element !
9604 */
9605 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9606 (!ctxt->disableSAX)) {
9607 if (nbNs > 0)
9608 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9610 nbatts / 5, nbdef, atts);
9611 else
9612 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9613 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9614 }
9615
9616 done:
9617 /*
9618 * Free up attribute allocated strings if needed
9619 */
9620 if (attval != 0) {
9621 for (i = 3,j = 0; j < nratts;i += 5,j++)
9622 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9623 xmlFree((xmlChar *) atts[i]);
9624 }
9625
9626 return(localname);
9627 }
9628
9629 /**
9630 * xmlParseEndTag2:
9631 * @ctxt: an XML parser context
9632 * @line: line of the start tag
9633 * @nsNr: number of namespaces on the start tag
9634 *
9635 * parse an end of tag
9636 *
9637 * [42] ETag ::= '</' Name S? '>'
9638 *
9639 * With namespace
9640 *
9641 * [NS 9] ETag ::= '</' QName S? '>'
9642 */
9643
9644 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9645 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9646 const xmlChar *URI, int line, int nsNr, int tlen) {
9647 const xmlChar *name;
9648 size_t curLength;
9649
9650 GROW;
9651 if ((RAW != '<') || (NXT(1) != '/')) {
9652 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9653 return;
9654 }
9655 SKIP(2);
9656
9657 curLength = ctxt->input->end - ctxt->input->cur;
9658 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9659 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9660 if ((curLength >= (size_t)(tlen + 1)) &&
9661 (ctxt->input->cur[tlen] == '>')) {
9662 ctxt->input->cur += tlen + 1;
9663 ctxt->input->col += tlen + 1;
9664 goto done;
9665 }
9666 ctxt->input->cur += tlen;
9667 ctxt->input->col += tlen;
9668 name = (xmlChar*)1;
9669 } else {
9670 if (prefix == NULL)
9671 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9672 else
9673 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9674 }
9675
9676 /*
9677 * We should definitely be at the ending "S? '>'" part
9678 */
9679 GROW;
9680 if (ctxt->instate == XML_PARSER_EOF)
9681 return;
9682 SKIP_BLANKS;
9683 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9684 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9685 } else
9686 NEXT1;
9687
9688 /*
9689 * [ WFC: Element Type Match ]
9690 * The Name in an element's end-tag must match the element type in the
9691 * start-tag.
9692 *
9693 */
9694 if (name != (xmlChar*)1) {
9695 if (name == NULL) name = BAD_CAST "unparseable";
9696 if ((line == 0) && (ctxt->node != NULL))
9697 line = ctxt->node->line;
9698 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9699 "Opening and ending tag mismatch: %s line %d and %s\n",
9700 ctxt->name, line, name);
9701 }
9702
9703 /*
9704 * SAX: End of Tag
9705 */
9706 done:
9707 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9708 (!ctxt->disableSAX))
9709 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9710
9711 spacePop(ctxt);
9712 if (nsNr != 0)
9713 nsPop(ctxt, nsNr);
9714 return;
9715 }
9716
9717 /**
9718 * xmlParseCDSect:
9719 * @ctxt: an XML parser context
9720 *
9721 * Parse escaped pure raw content.
9722 *
9723 * [18] CDSect ::= CDStart CData CDEnd
9724 *
9725 * [19] CDStart ::= '<![CDATA['
9726 *
9727 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9728 *
9729 * [21] CDEnd ::= ']]>'
9730 */
9731 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9732 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9733 xmlChar *buf = NULL;
9734 int len = 0;
9735 int size = XML_PARSER_BUFFER_SIZE;
9736 int r, rl;
9737 int s, sl;
9738 int cur, l;
9739 int count = 0;
9740
9741 /* Check 2.6.0 was NXT(0) not RAW */
9742 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9743 SKIP(9);
9744 } else
9745 return;
9746
9747 ctxt->instate = XML_PARSER_CDATA_SECTION;
9748 r = CUR_CHAR(rl);
9749 if (!IS_CHAR(r)) {
9750 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9751 ctxt->instate = XML_PARSER_CONTENT;
9752 return;
9753 }
9754 NEXTL(rl);
9755 s = CUR_CHAR(sl);
9756 if (!IS_CHAR(s)) {
9757 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9758 ctxt->instate = XML_PARSER_CONTENT;
9759 return;
9760 }
9761 NEXTL(sl);
9762 cur = CUR_CHAR(l);
9763 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9764 if (buf == NULL) {
9765 xmlErrMemory(ctxt, NULL);
9766 return;
9767 }
9768 while (IS_CHAR(cur) &&
9769 ((r != ']') || (s != ']') || (cur != '>'))) {
9770 if (len + 5 >= size) {
9771 xmlChar *tmp;
9772
9773 if ((size > XML_MAX_TEXT_LENGTH) &&
9774 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9775 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9776 "CData section too big found", NULL);
9777 xmlFree (buf);
9778 return;
9779 }
9780 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9781 if (tmp == NULL) {
9782 xmlFree(buf);
9783 xmlErrMemory(ctxt, NULL);
9784 return;
9785 }
9786 buf = tmp;
9787 size *= 2;
9788 }
9789 COPY_BUF(rl,buf,len,r);
9790 r = s;
9791 rl = sl;
9792 s = cur;
9793 sl = l;
9794 count++;
9795 if (count > 50) {
9796 GROW;
9797 if (ctxt->instate == XML_PARSER_EOF) {
9798 xmlFree(buf);
9799 return;
9800 }
9801 count = 0;
9802 }
9803 NEXTL(l);
9804 cur = CUR_CHAR(l);
9805 }
9806 buf[len] = 0;
9807 ctxt->instate = XML_PARSER_CONTENT;
9808 if (cur != '>') {
9809 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9810 "CData section not finished\n%.50s\n", buf);
9811 xmlFree(buf);
9812 return;
9813 }
9814 NEXTL(l);
9815
9816 /*
9817 * OK the buffer is to be consumed as cdata.
9818 */
9819 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9820 if (ctxt->sax->cdataBlock != NULL)
9821 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9822 else if (ctxt->sax->characters != NULL)
9823 ctxt->sax->characters(ctxt->userData, buf, len);
9824 }
9825 xmlFree(buf);
9826 }
9827
9828 /**
9829 * xmlParseContent:
9830 * @ctxt: an XML parser context
9831 *
9832 * Parse a content:
9833 *
9834 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9835 */
9836
9837 void
xmlParseContent(xmlParserCtxtPtr ctxt)9838 xmlParseContent(xmlParserCtxtPtr ctxt) {
9839 GROW;
9840 while ((RAW != 0) &&
9841 ((RAW != '<') || (NXT(1) != '/')) &&
9842 (ctxt->instate != XML_PARSER_EOF)) {
9843 const xmlChar *test = CUR_PTR;
9844 unsigned int cons = ctxt->input->consumed;
9845 const xmlChar *cur = ctxt->input->cur;
9846
9847 /*
9848 * First case : a Processing Instruction.
9849 */
9850 if ((*cur == '<') && (cur[1] == '?')) {
9851 xmlParsePI(ctxt);
9852 }
9853
9854 /*
9855 * Second case : a CDSection
9856 */
9857 /* 2.6.0 test was *cur not RAW */
9858 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9859 xmlParseCDSect(ctxt);
9860 }
9861
9862 /*
9863 * Third case : a comment
9864 */
9865 else if ((*cur == '<') && (NXT(1) == '!') &&
9866 (NXT(2) == '-') && (NXT(3) == '-')) {
9867 xmlParseComment(ctxt);
9868 ctxt->instate = XML_PARSER_CONTENT;
9869 }
9870
9871 /*
9872 * Fourth case : a sub-element.
9873 */
9874 else if (*cur == '<') {
9875 xmlParseElement(ctxt);
9876 }
9877
9878 /*
9879 * Fifth case : a reference. If if has not been resolved,
9880 * parsing returns it's Name, create the node
9881 */
9882
9883 else if (*cur == '&') {
9884 xmlParseReference(ctxt);
9885 }
9886
9887 /*
9888 * Last case, text. Note that References are handled directly.
9889 */
9890 else {
9891 xmlParseCharData(ctxt, 0);
9892 }
9893
9894 GROW;
9895 SHRINK;
9896
9897 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9898 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9899 "detected an error in element content\n");
9900 xmlHaltParser(ctxt);
9901 break;
9902 }
9903 }
9904 }
9905
9906 /**
9907 * xmlParseElement:
9908 * @ctxt: an XML parser context
9909 *
9910 * parse an XML element, this is highly recursive
9911 *
9912 * [39] element ::= EmptyElemTag | STag content ETag
9913 *
9914 * [ WFC: Element Type Match ]
9915 * The Name in an element's end-tag must match the element type in the
9916 * start-tag.
9917 *
9918 */
9919
9920 void
xmlParseElement(xmlParserCtxtPtr ctxt)9921 xmlParseElement(xmlParserCtxtPtr ctxt) {
9922 const xmlChar *name;
9923 const xmlChar *prefix = NULL;
9924 const xmlChar *URI = NULL;
9925 xmlParserNodeInfo node_info;
9926 int line, tlen = 0;
9927 xmlNodePtr ret;
9928 int nsNr = ctxt->nsNr;
9929
9930 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9931 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9932 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9933 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9934 xmlParserMaxDepth);
9935 xmlHaltParser(ctxt);
9936 return;
9937 }
9938
9939 /* Capture start position */
9940 if (ctxt->record_info) {
9941 node_info.begin_pos = ctxt->input->consumed +
9942 (CUR_PTR - ctxt->input->base);
9943 node_info.begin_line = ctxt->input->line;
9944 }
9945
9946 if (ctxt->spaceNr == 0)
9947 spacePush(ctxt, -1);
9948 else if (*ctxt->space == -2)
9949 spacePush(ctxt, -1);
9950 else
9951 spacePush(ctxt, *ctxt->space);
9952
9953 line = ctxt->input->line;
9954 #ifdef LIBXML_SAX1_ENABLED
9955 if (ctxt->sax2)
9956 #endif /* LIBXML_SAX1_ENABLED */
9957 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9958 #ifdef LIBXML_SAX1_ENABLED
9959 else
9960 name = xmlParseStartTag(ctxt);
9961 #endif /* LIBXML_SAX1_ENABLED */
9962 if (ctxt->instate == XML_PARSER_EOF)
9963 return;
9964 if (name == NULL) {
9965 spacePop(ctxt);
9966 return;
9967 }
9968 namePush(ctxt, name);
9969 ret = ctxt->node;
9970
9971 #ifdef LIBXML_VALID_ENABLED
9972 /*
9973 * [ VC: Root Element Type ]
9974 * The Name in the document type declaration must match the element
9975 * type of the root element.
9976 */
9977 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9978 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9979 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9980 #endif /* LIBXML_VALID_ENABLED */
9981
9982 /*
9983 * Check for an Empty Element.
9984 */
9985 if ((RAW == '/') && (NXT(1) == '>')) {
9986 SKIP(2);
9987 if (ctxt->sax2) {
9988 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9989 (!ctxt->disableSAX))
9990 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9991 #ifdef LIBXML_SAX1_ENABLED
9992 } else {
9993 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9994 (!ctxt->disableSAX))
9995 ctxt->sax->endElement(ctxt->userData, name);
9996 #endif /* LIBXML_SAX1_ENABLED */
9997 }
9998 namePop(ctxt);
9999 spacePop(ctxt);
10000 if (nsNr != ctxt->nsNr)
10001 nsPop(ctxt, ctxt->nsNr - nsNr);
10002 if ( ret != NULL && ctxt->record_info ) {
10003 node_info.end_pos = ctxt->input->consumed +
10004 (CUR_PTR - ctxt->input->base);
10005 node_info.end_line = ctxt->input->line;
10006 node_info.node = ret;
10007 xmlParserAddNodeInfo(ctxt, &node_info);
10008 }
10009 return;
10010 }
10011 if (RAW == '>') {
10012 NEXT1;
10013 } else {
10014 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10015 "Couldn't find end of Start Tag %s line %d\n",
10016 name, line, NULL);
10017
10018 /*
10019 * end of parsing of this node.
10020 */
10021 nodePop(ctxt);
10022 namePop(ctxt);
10023 spacePop(ctxt);
10024 if (nsNr != ctxt->nsNr)
10025 nsPop(ctxt, ctxt->nsNr - nsNr);
10026
10027 /*
10028 * Capture end position and add node
10029 */
10030 if ( ret != NULL && ctxt->record_info ) {
10031 node_info.end_pos = ctxt->input->consumed +
10032 (CUR_PTR - ctxt->input->base);
10033 node_info.end_line = ctxt->input->line;
10034 node_info.node = ret;
10035 xmlParserAddNodeInfo(ctxt, &node_info);
10036 }
10037 return;
10038 }
10039
10040 /*
10041 * Parse the content of the element:
10042 */
10043 xmlParseContent(ctxt);
10044 if (ctxt->instate == XML_PARSER_EOF)
10045 return;
10046 if (!IS_BYTE_CHAR(RAW)) {
10047 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10048 "Premature end of data in tag %s line %d\n",
10049 name, line, NULL);
10050
10051 /*
10052 * end of parsing of this node.
10053 */
10054 nodePop(ctxt);
10055 namePop(ctxt);
10056 spacePop(ctxt);
10057 if (nsNr != ctxt->nsNr)
10058 nsPop(ctxt, ctxt->nsNr - nsNr);
10059 return;
10060 }
10061
10062 /*
10063 * parse the end of tag: '</' should be here.
10064 */
10065 if (ctxt->sax2) {
10066 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10067 namePop(ctxt);
10068 }
10069 #ifdef LIBXML_SAX1_ENABLED
10070 else
10071 xmlParseEndTag1(ctxt, line);
10072 #endif /* LIBXML_SAX1_ENABLED */
10073
10074 /*
10075 * Capture end position and add node
10076 */
10077 if ( ret != NULL && ctxt->record_info ) {
10078 node_info.end_pos = ctxt->input->consumed +
10079 (CUR_PTR - ctxt->input->base);
10080 node_info.end_line = ctxt->input->line;
10081 node_info.node = ret;
10082 xmlParserAddNodeInfo(ctxt, &node_info);
10083 }
10084 }
10085
10086 /**
10087 * xmlParseVersionNum:
10088 * @ctxt: an XML parser context
10089 *
10090 * parse the XML version value.
10091 *
10092 * [26] VersionNum ::= '1.' [0-9]+
10093 *
10094 * In practice allow [0-9].[0-9]+ at that level
10095 *
10096 * Returns the string giving the XML version number, or NULL
10097 */
10098 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10099 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10100 xmlChar *buf = NULL;
10101 int len = 0;
10102 int size = 10;
10103 xmlChar cur;
10104
10105 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10106 if (buf == NULL) {
10107 xmlErrMemory(ctxt, NULL);
10108 return(NULL);
10109 }
10110 cur = CUR;
10111 if (!((cur >= '0') && (cur <= '9'))) {
10112 xmlFree(buf);
10113 return(NULL);
10114 }
10115 buf[len++] = cur;
10116 NEXT;
10117 cur=CUR;
10118 if (cur != '.') {
10119 xmlFree(buf);
10120 return(NULL);
10121 }
10122 buf[len++] = cur;
10123 NEXT;
10124 cur=CUR;
10125 while ((cur >= '0') && (cur <= '9')) {
10126 if (len + 1 >= size) {
10127 xmlChar *tmp;
10128
10129 size *= 2;
10130 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10131 if (tmp == NULL) {
10132 xmlFree(buf);
10133 xmlErrMemory(ctxt, NULL);
10134 return(NULL);
10135 }
10136 buf = tmp;
10137 }
10138 buf[len++] = cur;
10139 NEXT;
10140 cur=CUR;
10141 }
10142 buf[len] = 0;
10143 return(buf);
10144 }
10145
10146 /**
10147 * xmlParseVersionInfo:
10148 * @ctxt: an XML parser context
10149 *
10150 * parse the XML version.
10151 *
10152 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10153 *
10154 * [25] Eq ::= S? '=' S?
10155 *
10156 * Returns the version string, e.g. "1.0"
10157 */
10158
10159 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10160 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10161 xmlChar *version = NULL;
10162
10163 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10164 SKIP(7);
10165 SKIP_BLANKS;
10166 if (RAW != '=') {
10167 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10168 return(NULL);
10169 }
10170 NEXT;
10171 SKIP_BLANKS;
10172 if (RAW == '"') {
10173 NEXT;
10174 version = xmlParseVersionNum(ctxt);
10175 if (RAW != '"') {
10176 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10177 } else
10178 NEXT;
10179 } else if (RAW == '\''){
10180 NEXT;
10181 version = xmlParseVersionNum(ctxt);
10182 if (RAW != '\'') {
10183 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10184 } else
10185 NEXT;
10186 } else {
10187 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10188 }
10189 }
10190 return(version);
10191 }
10192
10193 /**
10194 * xmlParseEncName:
10195 * @ctxt: an XML parser context
10196 *
10197 * parse the XML encoding name
10198 *
10199 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10200 *
10201 * Returns the encoding name value or NULL
10202 */
10203 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10204 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10205 xmlChar *buf = NULL;
10206 int len = 0;
10207 int size = 10;
10208 xmlChar cur;
10209
10210 cur = CUR;
10211 if (((cur >= 'a') && (cur <= 'z')) ||
10212 ((cur >= 'A') && (cur <= 'Z'))) {
10213 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10214 if (buf == NULL) {
10215 xmlErrMemory(ctxt, NULL);
10216 return(NULL);
10217 }
10218
10219 buf[len++] = cur;
10220 NEXT;
10221 cur = CUR;
10222 while (((cur >= 'a') && (cur <= 'z')) ||
10223 ((cur >= 'A') && (cur <= 'Z')) ||
10224 ((cur >= '0') && (cur <= '9')) ||
10225 (cur == '.') || (cur == '_') ||
10226 (cur == '-')) {
10227 if (len + 1 >= size) {
10228 xmlChar *tmp;
10229
10230 size *= 2;
10231 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10232 if (tmp == NULL) {
10233 xmlErrMemory(ctxt, NULL);
10234 xmlFree(buf);
10235 return(NULL);
10236 }
10237 buf = tmp;
10238 }
10239 buf[len++] = cur;
10240 NEXT;
10241 cur = CUR;
10242 if (cur == 0) {
10243 SHRINK;
10244 GROW;
10245 cur = CUR;
10246 }
10247 }
10248 buf[len] = 0;
10249 } else {
10250 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10251 }
10252 return(buf);
10253 }
10254
10255 /**
10256 * xmlParseEncodingDecl:
10257 * @ctxt: an XML parser context
10258 *
10259 * parse the XML encoding declaration
10260 *
10261 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10262 *
10263 * this setups the conversion filters.
10264 *
10265 * Returns the encoding value or NULL
10266 */
10267
10268 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10269 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10270 xmlChar *encoding = NULL;
10271
10272 SKIP_BLANKS;
10273 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10274 SKIP(8);
10275 SKIP_BLANKS;
10276 if (RAW != '=') {
10277 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10278 return(NULL);
10279 }
10280 NEXT;
10281 SKIP_BLANKS;
10282 if (RAW == '"') {
10283 NEXT;
10284 encoding = xmlParseEncName(ctxt);
10285 if (RAW != '"') {
10286 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10287 xmlFree((xmlChar *) encoding);
10288 return(NULL);
10289 } else
10290 NEXT;
10291 } else if (RAW == '\''){
10292 NEXT;
10293 encoding = xmlParseEncName(ctxt);
10294 if (RAW != '\'') {
10295 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10296 xmlFree((xmlChar *) encoding);
10297 return(NULL);
10298 } else
10299 NEXT;
10300 } else {
10301 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10302 }
10303
10304 /*
10305 * Non standard parsing, allowing the user to ignore encoding
10306 */
10307 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10308 xmlFree((xmlChar *) encoding);
10309 return(NULL);
10310 }
10311
10312 /*
10313 * UTF-16 encoding stwich has already taken place at this stage,
10314 * more over the little-endian/big-endian selection is already done
10315 */
10316 if ((encoding != NULL) &&
10317 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10318 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10319 /*
10320 * If no encoding was passed to the parser, that we are
10321 * using UTF-16 and no decoder is present i.e. the
10322 * document is apparently UTF-8 compatible, then raise an
10323 * encoding mismatch fatal error
10324 */
10325 if ((ctxt->encoding == NULL) &&
10326 (ctxt->input->buf != NULL) &&
10327 (ctxt->input->buf->encoder == NULL)) {
10328 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10329 "Document labelled UTF-16 but has UTF-8 content\n");
10330 }
10331 if (ctxt->encoding != NULL)
10332 xmlFree((xmlChar *) ctxt->encoding);
10333 ctxt->encoding = encoding;
10334 }
10335 /*
10336 * UTF-8 encoding is handled natively
10337 */
10338 else if ((encoding != NULL) &&
10339 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10340 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10341 if (ctxt->encoding != NULL)
10342 xmlFree((xmlChar *) ctxt->encoding);
10343 ctxt->encoding = encoding;
10344 }
10345 else if (encoding != NULL) {
10346 xmlCharEncodingHandlerPtr handler;
10347
10348 if (ctxt->input->encoding != NULL)
10349 xmlFree((xmlChar *) ctxt->input->encoding);
10350 ctxt->input->encoding = encoding;
10351
10352 handler = xmlFindCharEncodingHandler((const char *) encoding);
10353 if (handler != NULL) {
10354 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10355 /* failed to convert */
10356 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10357 return(NULL);
10358 }
10359 } else {
10360 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10361 "Unsupported encoding %s\n", encoding);
10362 return(NULL);
10363 }
10364 }
10365 }
10366 return(encoding);
10367 }
10368
10369 /**
10370 * xmlParseSDDecl:
10371 * @ctxt: an XML parser context
10372 *
10373 * parse the XML standalone declaration
10374 *
10375 * [32] SDDecl ::= S 'standalone' Eq
10376 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10377 *
10378 * [ VC: Standalone Document Declaration ]
10379 * TODO The standalone document declaration must have the value "no"
10380 * if any external markup declarations contain declarations of:
10381 * - attributes with default values, if elements to which these
10382 * attributes apply appear in the document without specifications
10383 * of values for these attributes, or
10384 * - entities (other than amp, lt, gt, apos, quot), if references
10385 * to those entities appear in the document, or
10386 * - attributes with values subject to normalization, where the
10387 * attribute appears in the document with a value which will change
10388 * as a result of normalization, or
10389 * - element types with element content, if white space occurs directly
10390 * within any instance of those types.
10391 *
10392 * Returns:
10393 * 1 if standalone="yes"
10394 * 0 if standalone="no"
10395 * -2 if standalone attribute is missing or invalid
10396 * (A standalone value of -2 means that the XML declaration was found,
10397 * but no value was specified for the standalone attribute).
10398 */
10399
10400 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10401 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10402 int standalone = -2;
10403
10404 SKIP_BLANKS;
10405 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10406 SKIP(10);
10407 SKIP_BLANKS;
10408 if (RAW != '=') {
10409 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10410 return(standalone);
10411 }
10412 NEXT;
10413 SKIP_BLANKS;
10414 if (RAW == '\''){
10415 NEXT;
10416 if ((RAW == 'n') && (NXT(1) == 'o')) {
10417 standalone = 0;
10418 SKIP(2);
10419 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10420 (NXT(2) == 's')) {
10421 standalone = 1;
10422 SKIP(3);
10423 } else {
10424 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10425 }
10426 if (RAW != '\'') {
10427 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10428 } else
10429 NEXT;
10430 } else if (RAW == '"'){
10431 NEXT;
10432 if ((RAW == 'n') && (NXT(1) == 'o')) {
10433 standalone = 0;
10434 SKIP(2);
10435 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10436 (NXT(2) == 's')) {
10437 standalone = 1;
10438 SKIP(3);
10439 } else {
10440 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10441 }
10442 if (RAW != '"') {
10443 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10444 } else
10445 NEXT;
10446 } else {
10447 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10448 }
10449 }
10450 return(standalone);
10451 }
10452
10453 /**
10454 * xmlParseXMLDecl:
10455 * @ctxt: an XML parser context
10456 *
10457 * parse an XML declaration header
10458 *
10459 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10460 */
10461
10462 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10463 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10464 xmlChar *version;
10465
10466 /*
10467 * This value for standalone indicates that the document has an
10468 * XML declaration but it does not have a standalone attribute.
10469 * It will be overwritten later if a standalone attribute is found.
10470 */
10471 ctxt->input->standalone = -2;
10472
10473 /*
10474 * We know that '<?xml' is here.
10475 */
10476 SKIP(5);
10477
10478 if (!IS_BLANK_CH(RAW)) {
10479 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10480 "Blank needed after '<?xml'\n");
10481 }
10482 SKIP_BLANKS;
10483
10484 /*
10485 * We must have the VersionInfo here.
10486 */
10487 version = xmlParseVersionInfo(ctxt);
10488 if (version == NULL) {
10489 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10490 } else {
10491 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10492 /*
10493 * Changed here for XML-1.0 5th edition
10494 */
10495 if (ctxt->options & XML_PARSE_OLD10) {
10496 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10497 "Unsupported version '%s'\n",
10498 version);
10499 } else {
10500 if ((version[0] == '1') && ((version[1] == '.'))) {
10501 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10502 "Unsupported version '%s'\n",
10503 version, NULL);
10504 } else {
10505 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10506 "Unsupported version '%s'\n",
10507 version);
10508 }
10509 }
10510 }
10511 if (ctxt->version != NULL)
10512 xmlFree((void *) ctxt->version);
10513 ctxt->version = version;
10514 }
10515
10516 /*
10517 * We may have the encoding declaration
10518 */
10519 if (!IS_BLANK_CH(RAW)) {
10520 if ((RAW == '?') && (NXT(1) == '>')) {
10521 SKIP(2);
10522 return;
10523 }
10524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10525 }
10526 xmlParseEncodingDecl(ctxt);
10527 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10528 (ctxt->instate == XML_PARSER_EOF)) {
10529 /*
10530 * The XML REC instructs us to stop parsing right here
10531 */
10532 return;
10533 }
10534
10535 /*
10536 * We may have the standalone status.
10537 */
10538 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10539 if ((RAW == '?') && (NXT(1) == '>')) {
10540 SKIP(2);
10541 return;
10542 }
10543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10544 }
10545
10546 /*
10547 * We can grow the input buffer freely at that point
10548 */
10549 GROW;
10550
10551 SKIP_BLANKS;
10552 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10553
10554 SKIP_BLANKS;
10555 if ((RAW == '?') && (NXT(1) == '>')) {
10556 SKIP(2);
10557 } else if (RAW == '>') {
10558 /* Deprecated old WD ... */
10559 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10560 NEXT;
10561 } else {
10562 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10563 MOVETO_ENDTAG(CUR_PTR);
10564 NEXT;
10565 }
10566 }
10567
10568 /**
10569 * xmlParseMisc:
10570 * @ctxt: an XML parser context
10571 *
10572 * parse an XML Misc* optional field.
10573 *
10574 * [27] Misc ::= Comment | PI | S
10575 */
10576
10577 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10578 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10579 while ((ctxt->instate != XML_PARSER_EOF) &&
10580 (((RAW == '<') && (NXT(1) == '?')) ||
10581 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10582 IS_BLANK_CH(CUR))) {
10583 if ((RAW == '<') && (NXT(1) == '?')) {
10584 xmlParsePI(ctxt);
10585 } else if (IS_BLANK_CH(CUR)) {
10586 NEXT;
10587 } else
10588 xmlParseComment(ctxt);
10589 }
10590 }
10591
10592 /**
10593 * xmlParseDocument:
10594 * @ctxt: an XML parser context
10595 *
10596 * parse an XML document (and build a tree if using the standard SAX
10597 * interface).
10598 *
10599 * [1] document ::= prolog element Misc*
10600 *
10601 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10602 *
10603 * Returns 0, -1 in case of error. the parser context is augmented
10604 * as a result of the parsing.
10605 */
10606
10607 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10608 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10609 xmlChar start[4];
10610 xmlCharEncoding enc;
10611
10612 xmlInitParser();
10613
10614 if ((ctxt == NULL) || (ctxt->input == NULL))
10615 return(-1);
10616
10617 GROW;
10618
10619 /*
10620 * SAX: detecting the level.
10621 */
10622 xmlDetectSAX2(ctxt);
10623
10624 /*
10625 * SAX: beginning of the document processing.
10626 */
10627 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10629 if (ctxt->instate == XML_PARSER_EOF)
10630 return(-1);
10631
10632 if ((ctxt->encoding == NULL) &&
10633 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10634 /*
10635 * Get the 4 first bytes and decode the charset
10636 * if enc != XML_CHAR_ENCODING_NONE
10637 * plug some encoding conversion routines.
10638 */
10639 start[0] = RAW;
10640 start[1] = NXT(1);
10641 start[2] = NXT(2);
10642 start[3] = NXT(3);
10643 enc = xmlDetectCharEncoding(&start[0], 4);
10644 if (enc != XML_CHAR_ENCODING_NONE) {
10645 xmlSwitchEncoding(ctxt, enc);
10646 }
10647 }
10648
10649
10650 if (CUR == 0) {
10651 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10652 return(-1);
10653 }
10654
10655 /*
10656 * Check for the XMLDecl in the Prolog.
10657 * do not GROW here to avoid the detected encoder to decode more
10658 * than just the first line, unless the amount of data is really
10659 * too small to hold "<?xml version="1.0" encoding="foo"
10660 */
10661 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10662 GROW;
10663 }
10664 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10665
10666 /*
10667 * Note that we will switch encoding on the fly.
10668 */
10669 xmlParseXMLDecl(ctxt);
10670 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10671 (ctxt->instate == XML_PARSER_EOF)) {
10672 /*
10673 * The XML REC instructs us to stop parsing right here
10674 */
10675 return(-1);
10676 }
10677 ctxt->standalone = ctxt->input->standalone;
10678 SKIP_BLANKS;
10679 } else {
10680 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10681 }
10682 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10683 ctxt->sax->startDocument(ctxt->userData);
10684 if (ctxt->instate == XML_PARSER_EOF)
10685 return(-1);
10686 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10687 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10688 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10689 }
10690
10691 /*
10692 * The Misc part of the Prolog
10693 */
10694 GROW;
10695 xmlParseMisc(ctxt);
10696
10697 /*
10698 * Then possibly doc type declaration(s) and more Misc
10699 * (doctypedecl Misc*)?
10700 */
10701 GROW;
10702 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10703
10704 ctxt->inSubset = 1;
10705 xmlParseDocTypeDecl(ctxt);
10706 if (RAW == '[') {
10707 ctxt->instate = XML_PARSER_DTD;
10708 xmlParseInternalSubset(ctxt);
10709 if (ctxt->instate == XML_PARSER_EOF)
10710 return(-1);
10711 }
10712
10713 /*
10714 * Create and update the external subset.
10715 */
10716 ctxt->inSubset = 2;
10717 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10718 (!ctxt->disableSAX))
10719 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10720 ctxt->extSubSystem, ctxt->extSubURI);
10721 if (ctxt->instate == XML_PARSER_EOF)
10722 return(-1);
10723 ctxt->inSubset = 0;
10724
10725 xmlCleanSpecialAttr(ctxt);
10726
10727 ctxt->instate = XML_PARSER_PROLOG;
10728 xmlParseMisc(ctxt);
10729 }
10730
10731 /*
10732 * Time to start parsing the tree itself
10733 */
10734 GROW;
10735 if (RAW != '<') {
10736 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737 "Start tag expected, '<' not found\n");
10738 } else {
10739 ctxt->instate = XML_PARSER_CONTENT;
10740 xmlParseElement(ctxt);
10741 ctxt->instate = XML_PARSER_EPILOG;
10742
10743
10744 /*
10745 * The Misc part at the end
10746 */
10747 xmlParseMisc(ctxt);
10748
10749 if (RAW != 0) {
10750 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10751 }
10752 ctxt->instate = XML_PARSER_EOF;
10753 }
10754
10755 /*
10756 * SAX: end of the document processing.
10757 */
10758 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10759 ctxt->sax->endDocument(ctxt->userData);
10760
10761 /*
10762 * Remove locally kept entity definitions if the tree was not built
10763 */
10764 if ((ctxt->myDoc != NULL) &&
10765 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10766 xmlFreeDoc(ctxt->myDoc);
10767 ctxt->myDoc = NULL;
10768 }
10769
10770 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10771 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10772 if (ctxt->valid)
10773 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10774 if (ctxt->nsWellFormed)
10775 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10776 if (ctxt->options & XML_PARSE_OLD10)
10777 ctxt->myDoc->properties |= XML_DOC_OLD10;
10778 }
10779 if (! ctxt->wellFormed) {
10780 ctxt->valid = 0;
10781 return(-1);
10782 }
10783 return(0);
10784 }
10785
10786 /**
10787 * xmlParseExtParsedEnt:
10788 * @ctxt: an XML parser context
10789 *
10790 * parse a general parsed entity
10791 * An external general parsed entity is well-formed if it matches the
10792 * production labeled extParsedEnt.
10793 *
10794 * [78] extParsedEnt ::= TextDecl? content
10795 *
10796 * Returns 0, -1 in case of error. the parser context is augmented
10797 * as a result of the parsing.
10798 */
10799
10800 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10801 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10802 xmlChar start[4];
10803 xmlCharEncoding enc;
10804
10805 if ((ctxt == NULL) || (ctxt->input == NULL))
10806 return(-1);
10807
10808 xmlDefaultSAXHandlerInit();
10809
10810 xmlDetectSAX2(ctxt);
10811
10812 GROW;
10813
10814 /*
10815 * SAX: beginning of the document processing.
10816 */
10817 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10818 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10819
10820 /*
10821 * Get the 4 first bytes and decode the charset
10822 * if enc != XML_CHAR_ENCODING_NONE
10823 * plug some encoding conversion routines.
10824 */
10825 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10826 start[0] = RAW;
10827 start[1] = NXT(1);
10828 start[2] = NXT(2);
10829 start[3] = NXT(3);
10830 enc = xmlDetectCharEncoding(start, 4);
10831 if (enc != XML_CHAR_ENCODING_NONE) {
10832 xmlSwitchEncoding(ctxt, enc);
10833 }
10834 }
10835
10836
10837 if (CUR == 0) {
10838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10839 }
10840
10841 /*
10842 * Check for the XMLDecl in the Prolog.
10843 */
10844 GROW;
10845 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10846
10847 /*
10848 * Note that we will switch encoding on the fly.
10849 */
10850 xmlParseXMLDecl(ctxt);
10851 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10852 /*
10853 * The XML REC instructs us to stop parsing right here
10854 */
10855 return(-1);
10856 }
10857 SKIP_BLANKS;
10858 } else {
10859 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10860 }
10861 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10862 ctxt->sax->startDocument(ctxt->userData);
10863 if (ctxt->instate == XML_PARSER_EOF)
10864 return(-1);
10865
10866 /*
10867 * Doing validity checking on chunk doesn't make sense
10868 */
10869 ctxt->instate = XML_PARSER_CONTENT;
10870 ctxt->validate = 0;
10871 ctxt->loadsubset = 0;
10872 ctxt->depth = 0;
10873
10874 xmlParseContent(ctxt);
10875 if (ctxt->instate == XML_PARSER_EOF)
10876 return(-1);
10877
10878 if ((RAW == '<') && (NXT(1) == '/')) {
10879 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10880 } else if (RAW != 0) {
10881 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10882 }
10883
10884 /*
10885 * SAX: end of the document processing.
10886 */
10887 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10888 ctxt->sax->endDocument(ctxt->userData);
10889
10890 if (! ctxt->wellFormed) return(-1);
10891 return(0);
10892 }
10893
10894 #ifdef LIBXML_PUSH_ENABLED
10895 /************************************************************************
10896 * *
10897 * Progressive parsing interfaces *
10898 * *
10899 ************************************************************************/
10900
10901 /**
10902 * xmlParseLookupSequence:
10903 * @ctxt: an XML parser context
10904 * @first: the first char to lookup
10905 * @next: the next char to lookup or zero
10906 * @third: the next char to lookup or zero
10907 *
10908 * Try to find if a sequence (first, next, third) or just (first next) or
10909 * (first) is available in the input stream.
10910 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10911 * to avoid rescanning sequences of bytes, it DOES change the state of the
10912 * parser, do not use liberally.
10913 *
10914 * Returns the index to the current parsing point if the full sequence
10915 * is available, -1 otherwise.
10916 */
10917 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10918 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10919 xmlChar next, xmlChar third) {
10920 int base, len;
10921 xmlParserInputPtr in;
10922 const xmlChar *buf;
10923
10924 in = ctxt->input;
10925 if (in == NULL) return(-1);
10926 base = in->cur - in->base;
10927 if (base < 0) return(-1);
10928 if (ctxt->checkIndex > base)
10929 base = ctxt->checkIndex;
10930 if (in->buf == NULL) {
10931 buf = in->base;
10932 len = in->length;
10933 } else {
10934 buf = xmlBufContent(in->buf->buffer);
10935 len = xmlBufUse(in->buf->buffer);
10936 }
10937 /* take into account the sequence length */
10938 if (third) len -= 2;
10939 else if (next) len --;
10940 for (;base < len;base++) {
10941 if (buf[base] == first) {
10942 if (third != 0) {
10943 if ((buf[base + 1] != next) ||
10944 (buf[base + 2] != third)) continue;
10945 } else if (next != 0) {
10946 if (buf[base + 1] != next) continue;
10947 }
10948 ctxt->checkIndex = 0;
10949 #ifdef DEBUG_PUSH
10950 if (next == 0)
10951 xmlGenericError(xmlGenericErrorContext,
10952 "PP: lookup '%c' found at %d\n",
10953 first, base);
10954 else if (third == 0)
10955 xmlGenericError(xmlGenericErrorContext,
10956 "PP: lookup '%c%c' found at %d\n",
10957 first, next, base);
10958 else
10959 xmlGenericError(xmlGenericErrorContext,
10960 "PP: lookup '%c%c%c' found at %d\n",
10961 first, next, third, base);
10962 #endif
10963 return(base - (in->cur - in->base));
10964 }
10965 }
10966 ctxt->checkIndex = base;
10967 #ifdef DEBUG_PUSH
10968 if (next == 0)
10969 xmlGenericError(xmlGenericErrorContext,
10970 "PP: lookup '%c' failed\n", first);
10971 else if (third == 0)
10972 xmlGenericError(xmlGenericErrorContext,
10973 "PP: lookup '%c%c' failed\n", first, next);
10974 else
10975 xmlGenericError(xmlGenericErrorContext,
10976 "PP: lookup '%c%c%c' failed\n", first, next, third);
10977 #endif
10978 return(-1);
10979 }
10980
10981 /**
10982 * xmlParseGetLasts:
10983 * @ctxt: an XML parser context
10984 * @lastlt: pointer to store the last '<' from the input
10985 * @lastgt: pointer to store the last '>' from the input
10986 *
10987 * Lookup the last < and > in the current chunk
10988 */
10989 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10990 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10991 const xmlChar **lastgt) {
10992 const xmlChar *tmp;
10993
10994 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10995 xmlGenericError(xmlGenericErrorContext,
10996 "Internal error: xmlParseGetLasts\n");
10997 return;
10998 }
10999 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11000 tmp = ctxt->input->end;
11001 tmp--;
11002 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11003 if (tmp < ctxt->input->base) {
11004 *lastlt = NULL;
11005 *lastgt = NULL;
11006 } else {
11007 *lastlt = tmp;
11008 tmp++;
11009 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11010 if (*tmp == '\'') {
11011 tmp++;
11012 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11013 if (tmp < ctxt->input->end) tmp++;
11014 } else if (*tmp == '"') {
11015 tmp++;
11016 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11017 if (tmp < ctxt->input->end) tmp++;
11018 } else
11019 tmp++;
11020 }
11021 if (tmp < ctxt->input->end)
11022 *lastgt = tmp;
11023 else {
11024 tmp = *lastlt;
11025 tmp--;
11026 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11027 if (tmp >= ctxt->input->base)
11028 *lastgt = tmp;
11029 else
11030 *lastgt = NULL;
11031 }
11032 }
11033 } else {
11034 *lastlt = NULL;
11035 *lastgt = NULL;
11036 }
11037 }
11038 /**
11039 * xmlCheckCdataPush:
11040 * @cur: pointer to the block of characters
11041 * @len: length of the block in bytes
11042 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11043 *
11044 * Check that the block of characters is okay as SCdata content [20]
11045 *
11046 * Returns the number of bytes to pass if okay, a negative index where an
11047 * UTF-8 error occurred otherwise
11048 */
11049 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11050 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11051 int ix;
11052 unsigned char c;
11053 int codepoint;
11054
11055 if ((utf == NULL) || (len <= 0))
11056 return(0);
11057
11058 for (ix = 0; ix < len;) { /* string is 0-terminated */
11059 c = utf[ix];
11060 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11061 if (c >= 0x20)
11062 ix++;
11063 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11064 ix++;
11065 else
11066 return(-ix);
11067 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11068 if (ix + 2 > len) return(complete ? -ix : ix);
11069 if ((utf[ix+1] & 0xc0 ) != 0x80)
11070 return(-ix);
11071 codepoint = (utf[ix] & 0x1f) << 6;
11072 codepoint |= utf[ix+1] & 0x3f;
11073 if (!xmlIsCharQ(codepoint))
11074 return(-ix);
11075 ix += 2;
11076 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11077 if (ix + 3 > len) return(complete ? -ix : ix);
11078 if (((utf[ix+1] & 0xc0) != 0x80) ||
11079 ((utf[ix+2] & 0xc0) != 0x80))
11080 return(-ix);
11081 codepoint = (utf[ix] & 0xf) << 12;
11082 codepoint |= (utf[ix+1] & 0x3f) << 6;
11083 codepoint |= utf[ix+2] & 0x3f;
11084 if (!xmlIsCharQ(codepoint))
11085 return(-ix);
11086 ix += 3;
11087 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11088 if (ix + 4 > len) return(complete ? -ix : ix);
11089 if (((utf[ix+1] & 0xc0) != 0x80) ||
11090 ((utf[ix+2] & 0xc0) != 0x80) ||
11091 ((utf[ix+3] & 0xc0) != 0x80))
11092 return(-ix);
11093 codepoint = (utf[ix] & 0x7) << 18;
11094 codepoint |= (utf[ix+1] & 0x3f) << 12;
11095 codepoint |= (utf[ix+2] & 0x3f) << 6;
11096 codepoint |= utf[ix+3] & 0x3f;
11097 if (!xmlIsCharQ(codepoint))
11098 return(-ix);
11099 ix += 4;
11100 } else /* unknown encoding */
11101 return(-ix);
11102 }
11103 return(ix);
11104 }
11105
11106 /**
11107 * xmlParseTryOrFinish:
11108 * @ctxt: an XML parser context
11109 * @terminate: last chunk indicator
11110 *
11111 * Try to progress on parsing
11112 *
11113 * Returns zero if no parsing was possible
11114 */
11115 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11116 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11117 int ret = 0;
11118 int avail, tlen;
11119 xmlChar cur, next;
11120 const xmlChar *lastlt, *lastgt;
11121
11122 if (ctxt->input == NULL)
11123 return(0);
11124
11125 #ifdef DEBUG_PUSH
11126 switch (ctxt->instate) {
11127 case XML_PARSER_EOF:
11128 xmlGenericError(xmlGenericErrorContext,
11129 "PP: try EOF\n"); break;
11130 case XML_PARSER_START:
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: try START\n"); break;
11133 case XML_PARSER_MISC:
11134 xmlGenericError(xmlGenericErrorContext,
11135 "PP: try MISC\n");break;
11136 case XML_PARSER_COMMENT:
11137 xmlGenericError(xmlGenericErrorContext,
11138 "PP: try COMMENT\n");break;
11139 case XML_PARSER_PROLOG:
11140 xmlGenericError(xmlGenericErrorContext,
11141 "PP: try PROLOG\n");break;
11142 case XML_PARSER_START_TAG:
11143 xmlGenericError(xmlGenericErrorContext,
11144 "PP: try START_TAG\n");break;
11145 case XML_PARSER_CONTENT:
11146 xmlGenericError(xmlGenericErrorContext,
11147 "PP: try CONTENT\n");break;
11148 case XML_PARSER_CDATA_SECTION:
11149 xmlGenericError(xmlGenericErrorContext,
11150 "PP: try CDATA_SECTION\n");break;
11151 case XML_PARSER_END_TAG:
11152 xmlGenericError(xmlGenericErrorContext,
11153 "PP: try END_TAG\n");break;
11154 case XML_PARSER_ENTITY_DECL:
11155 xmlGenericError(xmlGenericErrorContext,
11156 "PP: try ENTITY_DECL\n");break;
11157 case XML_PARSER_ENTITY_VALUE:
11158 xmlGenericError(xmlGenericErrorContext,
11159 "PP: try ENTITY_VALUE\n");break;
11160 case XML_PARSER_ATTRIBUTE_VALUE:
11161 xmlGenericError(xmlGenericErrorContext,
11162 "PP: try ATTRIBUTE_VALUE\n");break;
11163 case XML_PARSER_DTD:
11164 xmlGenericError(xmlGenericErrorContext,
11165 "PP: try DTD\n");break;
11166 case XML_PARSER_EPILOG:
11167 xmlGenericError(xmlGenericErrorContext,
11168 "PP: try EPILOG\n");break;
11169 case XML_PARSER_PI:
11170 xmlGenericError(xmlGenericErrorContext,
11171 "PP: try PI\n");break;
11172 case XML_PARSER_IGNORE:
11173 xmlGenericError(xmlGenericErrorContext,
11174 "PP: try IGNORE\n");break;
11175 }
11176 #endif
11177
11178 if ((ctxt->input != NULL) &&
11179 (ctxt->input->cur - ctxt->input->base > 4096)) {
11180 xmlSHRINK(ctxt);
11181 ctxt->checkIndex = 0;
11182 }
11183 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11184
11185 while (ctxt->instate != XML_PARSER_EOF) {
11186 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11187 return(0);
11188
11189 if (ctxt->input == NULL) break;
11190 if (ctxt->input->buf == NULL)
11191 avail = ctxt->input->length -
11192 (ctxt->input->cur - ctxt->input->base);
11193 else {
11194 /*
11195 * If we are operating on converted input, try to flush
11196 * remainng chars to avoid them stalling in the non-converted
11197 * buffer. But do not do this in document start where
11198 * encoding="..." may not have been read and we work on a
11199 * guessed encoding.
11200 */
11201 if ((ctxt->instate != XML_PARSER_START) &&
11202 (ctxt->input->buf->raw != NULL) &&
11203 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11204 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11205 ctxt->input);
11206 size_t current = ctxt->input->cur - ctxt->input->base;
11207
11208 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11209 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11210 base, current);
11211 }
11212 avail = xmlBufUse(ctxt->input->buf->buffer) -
11213 (ctxt->input->cur - ctxt->input->base);
11214 }
11215 if (avail < 1)
11216 goto done;
11217 switch (ctxt->instate) {
11218 case XML_PARSER_EOF:
11219 /*
11220 * Document parsing is done !
11221 */
11222 goto done;
11223 case XML_PARSER_START:
11224 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11225 xmlChar start[4];
11226 xmlCharEncoding enc;
11227
11228 /*
11229 * Very first chars read from the document flow.
11230 */
11231 if (avail < 4)
11232 goto done;
11233
11234 /*
11235 * Get the 4 first bytes and decode the charset
11236 * if enc != XML_CHAR_ENCODING_NONE
11237 * plug some encoding conversion routines,
11238 * else xmlSwitchEncoding will set to (default)
11239 * UTF8.
11240 */
11241 start[0] = RAW;
11242 start[1] = NXT(1);
11243 start[2] = NXT(2);
11244 start[3] = NXT(3);
11245 enc = xmlDetectCharEncoding(start, 4);
11246 xmlSwitchEncoding(ctxt, enc);
11247 break;
11248 }
11249
11250 if (avail < 2)
11251 goto done;
11252 cur = ctxt->input->cur[0];
11253 next = ctxt->input->cur[1];
11254 if (cur == 0) {
11255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256 ctxt->sax->setDocumentLocator(ctxt->userData,
11257 &xmlDefaultSAXLocator);
11258 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11259 xmlHaltParser(ctxt);
11260 #ifdef DEBUG_PUSH
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: entering EOF\n");
11263 #endif
11264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265 ctxt->sax->endDocument(ctxt->userData);
11266 goto done;
11267 }
11268 if ((cur == '<') && (next == '?')) {
11269 /* PI or XML decl */
11270 if (avail < 5) return(ret);
11271 if ((!terminate) &&
11272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11273 return(ret);
11274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275 ctxt->sax->setDocumentLocator(ctxt->userData,
11276 &xmlDefaultSAXLocator);
11277 if ((ctxt->input->cur[2] == 'x') &&
11278 (ctxt->input->cur[3] == 'm') &&
11279 (ctxt->input->cur[4] == 'l') &&
11280 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11281 ret += 5;
11282 #ifdef DEBUG_PUSH
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: Parsing XML Decl\n");
11285 #endif
11286 xmlParseXMLDecl(ctxt);
11287 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11288 /*
11289 * The XML REC instructs us to stop parsing right
11290 * here
11291 */
11292 xmlHaltParser(ctxt);
11293 return(0);
11294 }
11295 ctxt->standalone = ctxt->input->standalone;
11296 if ((ctxt->encoding == NULL) &&
11297 (ctxt->input->encoding != NULL))
11298 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300 (!ctxt->disableSAX))
11301 ctxt->sax->startDocument(ctxt->userData);
11302 ctxt->instate = XML_PARSER_MISC;
11303 #ifdef DEBUG_PUSH
11304 xmlGenericError(xmlGenericErrorContext,
11305 "PP: entering MISC\n");
11306 #endif
11307 } else {
11308 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310 (!ctxt->disableSAX))
11311 ctxt->sax->startDocument(ctxt->userData);
11312 ctxt->instate = XML_PARSER_MISC;
11313 #ifdef DEBUG_PUSH
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: entering MISC\n");
11316 #endif
11317 }
11318 } else {
11319 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320 ctxt->sax->setDocumentLocator(ctxt->userData,
11321 &xmlDefaultSAXLocator);
11322 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11323 if (ctxt->version == NULL) {
11324 xmlErrMemory(ctxt, NULL);
11325 break;
11326 }
11327 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328 (!ctxt->disableSAX))
11329 ctxt->sax->startDocument(ctxt->userData);
11330 ctxt->instate = XML_PARSER_MISC;
11331 #ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering MISC\n");
11334 #endif
11335 }
11336 break;
11337 case XML_PARSER_START_TAG: {
11338 const xmlChar *name;
11339 const xmlChar *prefix = NULL;
11340 const xmlChar *URI = NULL;
11341 int nsNr = ctxt->nsNr;
11342
11343 if ((avail < 2) && (ctxt->inputNr == 1))
11344 goto done;
11345 cur = ctxt->input->cur[0];
11346 if (cur != '<') {
11347 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11348 xmlHaltParser(ctxt);
11349 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350 ctxt->sax->endDocument(ctxt->userData);
11351 goto done;
11352 }
11353 if (!terminate) {
11354 if (ctxt->progressive) {
11355 /* > can be found unescaped in attribute values */
11356 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11357 goto done;
11358 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11359 goto done;
11360 }
11361 }
11362 if (ctxt->spaceNr == 0)
11363 spacePush(ctxt, -1);
11364 else if (*ctxt->space == -2)
11365 spacePush(ctxt, -1);
11366 else
11367 spacePush(ctxt, *ctxt->space);
11368 #ifdef LIBXML_SAX1_ENABLED
11369 if (ctxt->sax2)
11370 #endif /* LIBXML_SAX1_ENABLED */
11371 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11372 #ifdef LIBXML_SAX1_ENABLED
11373 else
11374 name = xmlParseStartTag(ctxt);
11375 #endif /* LIBXML_SAX1_ENABLED */
11376 if (ctxt->instate == XML_PARSER_EOF)
11377 goto done;
11378 if (name == NULL) {
11379 spacePop(ctxt);
11380 xmlHaltParser(ctxt);
11381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382 ctxt->sax->endDocument(ctxt->userData);
11383 goto done;
11384 }
11385 #ifdef LIBXML_VALID_ENABLED
11386 /*
11387 * [ VC: Root Element Type ]
11388 * The Name in the document type declaration must match
11389 * the element type of the root element.
11390 */
11391 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11394 #endif /* LIBXML_VALID_ENABLED */
11395
11396 /*
11397 * Check for an Empty Element.
11398 */
11399 if ((RAW == '/') && (NXT(1) == '>')) {
11400 SKIP(2);
11401
11402 if (ctxt->sax2) {
11403 if ((ctxt->sax != NULL) &&
11404 (ctxt->sax->endElementNs != NULL) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->endElementNs(ctxt->userData, name,
11407 prefix, URI);
11408 if (ctxt->nsNr - nsNr > 0)
11409 nsPop(ctxt, ctxt->nsNr - nsNr);
11410 #ifdef LIBXML_SAX1_ENABLED
11411 } else {
11412 if ((ctxt->sax != NULL) &&
11413 (ctxt->sax->endElement != NULL) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->endElement(ctxt->userData, name);
11416 #endif /* LIBXML_SAX1_ENABLED */
11417 }
11418 if (ctxt->instate == XML_PARSER_EOF)
11419 goto done;
11420 spacePop(ctxt);
11421 if (ctxt->nameNr == 0) {
11422 ctxt->instate = XML_PARSER_EPILOG;
11423 } else {
11424 ctxt->instate = XML_PARSER_CONTENT;
11425 }
11426 ctxt->progressive = 1;
11427 break;
11428 }
11429 if (RAW == '>') {
11430 NEXT;
11431 } else {
11432 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11433 "Couldn't find end of Start Tag %s\n",
11434 name);
11435 nodePop(ctxt);
11436 spacePop(ctxt);
11437 }
11438 if (ctxt->sax2)
11439 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11440 #ifdef LIBXML_SAX1_ENABLED
11441 else
11442 namePush(ctxt, name);
11443 #endif /* LIBXML_SAX1_ENABLED */
11444
11445 ctxt->instate = XML_PARSER_CONTENT;
11446 ctxt->progressive = 1;
11447 break;
11448 }
11449 case XML_PARSER_CONTENT: {
11450 const xmlChar *test;
11451 unsigned int cons;
11452 if ((avail < 2) && (ctxt->inputNr == 1))
11453 goto done;
11454 cur = ctxt->input->cur[0];
11455 next = ctxt->input->cur[1];
11456
11457 test = CUR_PTR;
11458 cons = ctxt->input->consumed;
11459 if ((cur == '<') && (next == '/')) {
11460 ctxt->instate = XML_PARSER_END_TAG;
11461 break;
11462 } else if ((cur == '<') && (next == '?')) {
11463 if ((!terminate) &&
11464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11465 ctxt->progressive = XML_PARSER_PI;
11466 goto done;
11467 }
11468 xmlParsePI(ctxt);
11469 ctxt->instate = XML_PARSER_CONTENT;
11470 ctxt->progressive = 1;
11471 } else if ((cur == '<') && (next != '!')) {
11472 ctxt->instate = XML_PARSER_START_TAG;
11473 break;
11474 } else if ((cur == '<') && (next == '!') &&
11475 (ctxt->input->cur[2] == '-') &&
11476 (ctxt->input->cur[3] == '-')) {
11477 int term;
11478
11479 if (avail < 4)
11480 goto done;
11481 ctxt->input->cur += 4;
11482 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483 ctxt->input->cur -= 4;
11484 if ((!terminate) && (term < 0)) {
11485 ctxt->progressive = XML_PARSER_COMMENT;
11486 goto done;
11487 }
11488 xmlParseComment(ctxt);
11489 ctxt->instate = XML_PARSER_CONTENT;
11490 ctxt->progressive = 1;
11491 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492 (ctxt->input->cur[2] == '[') &&
11493 (ctxt->input->cur[3] == 'C') &&
11494 (ctxt->input->cur[4] == 'D') &&
11495 (ctxt->input->cur[5] == 'A') &&
11496 (ctxt->input->cur[6] == 'T') &&
11497 (ctxt->input->cur[7] == 'A') &&
11498 (ctxt->input->cur[8] == '[')) {
11499 SKIP(9);
11500 ctxt->instate = XML_PARSER_CDATA_SECTION;
11501 break;
11502 } else if ((cur == '<') && (next == '!') &&
11503 (avail < 9)) {
11504 goto done;
11505 } else if (cur == '&') {
11506 if ((!terminate) &&
11507 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11508 goto done;
11509 xmlParseReference(ctxt);
11510 } else {
11511 /* TODO Avoid the extra copy, handle directly !!! */
11512 /*
11513 * Goal of the following test is:
11514 * - minimize calls to the SAX 'character' callback
11515 * when they are mergeable
11516 * - handle an problem for isBlank when we only parse
11517 * a sequence of blank chars and the next one is
11518 * not available to check against '<' presence.
11519 * - tries to homogenize the differences in SAX
11520 * callbacks between the push and pull versions
11521 * of the parser.
11522 */
11523 if ((ctxt->inputNr == 1) &&
11524 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11525 if (!terminate) {
11526 if (ctxt->progressive) {
11527 if ((lastlt == NULL) ||
11528 (ctxt->input->cur > lastlt))
11529 goto done;
11530 } else if (xmlParseLookupSequence(ctxt,
11531 '<', 0, 0) < 0) {
11532 goto done;
11533 }
11534 }
11535 }
11536 ctxt->checkIndex = 0;
11537 xmlParseCharData(ctxt, 0);
11538 }
11539 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11540 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11541 "detected an error in element content\n");
11542 xmlHaltParser(ctxt);
11543 break;
11544 }
11545 break;
11546 }
11547 case XML_PARSER_END_TAG:
11548 if (avail < 2)
11549 goto done;
11550 if (!terminate) {
11551 if (ctxt->progressive) {
11552 /* > can be found unescaped in attribute values */
11553 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11554 goto done;
11555 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11556 goto done;
11557 }
11558 }
11559 if (ctxt->sax2) {
11560 xmlParseEndTag2(ctxt,
11561 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11562 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11563 (int) (ptrdiff_t)
11564 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11565 nameNsPop(ctxt);
11566 }
11567 #ifdef LIBXML_SAX1_ENABLED
11568 else
11569 xmlParseEndTag1(ctxt, 0);
11570 #endif /* LIBXML_SAX1_ENABLED */
11571 if (ctxt->instate == XML_PARSER_EOF) {
11572 /* Nothing */
11573 } else if (ctxt->nameNr == 0) {
11574 ctxt->instate = XML_PARSER_EPILOG;
11575 } else {
11576 ctxt->instate = XML_PARSER_CONTENT;
11577 }
11578 break;
11579 case XML_PARSER_CDATA_SECTION: {
11580 /*
11581 * The Push mode need to have the SAX callback for
11582 * cdataBlock merge back contiguous callbacks.
11583 */
11584 int base;
11585
11586 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11587 if (base < 0) {
11588 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11589 int tmp;
11590
11591 tmp = xmlCheckCdataPush(ctxt->input->cur,
11592 XML_PARSER_BIG_BUFFER_SIZE, 0);
11593 if (tmp < 0) {
11594 tmp = -tmp;
11595 ctxt->input->cur += tmp;
11596 goto encoding_error;
11597 }
11598 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11599 if (ctxt->sax->cdataBlock != NULL)
11600 ctxt->sax->cdataBlock(ctxt->userData,
11601 ctxt->input->cur, tmp);
11602 else if (ctxt->sax->characters != NULL)
11603 ctxt->sax->characters(ctxt->userData,
11604 ctxt->input->cur, tmp);
11605 }
11606 if (ctxt->instate == XML_PARSER_EOF)
11607 goto done;
11608 SKIPL(tmp);
11609 ctxt->checkIndex = 0;
11610 }
11611 goto done;
11612 } else {
11613 int tmp;
11614
11615 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11616 if ((tmp < 0) || (tmp != base)) {
11617 tmp = -tmp;
11618 ctxt->input->cur += tmp;
11619 goto encoding_error;
11620 }
11621 if ((ctxt->sax != NULL) && (base == 0) &&
11622 (ctxt->sax->cdataBlock != NULL) &&
11623 (!ctxt->disableSAX)) {
11624 /*
11625 * Special case to provide identical behaviour
11626 * between pull and push parsers on enpty CDATA
11627 * sections
11628 */
11629 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11630 (!strncmp((const char *)&ctxt->input->cur[-9],
11631 "<![CDATA[", 9)))
11632 ctxt->sax->cdataBlock(ctxt->userData,
11633 BAD_CAST "", 0);
11634 } else if ((ctxt->sax != NULL) && (base > 0) &&
11635 (!ctxt->disableSAX)) {
11636 if (ctxt->sax->cdataBlock != NULL)
11637 ctxt->sax->cdataBlock(ctxt->userData,
11638 ctxt->input->cur, base);
11639 else if (ctxt->sax->characters != NULL)
11640 ctxt->sax->characters(ctxt->userData,
11641 ctxt->input->cur, base);
11642 }
11643 if (ctxt->instate == XML_PARSER_EOF)
11644 goto done;
11645 SKIPL(base + 3);
11646 ctxt->checkIndex = 0;
11647 ctxt->instate = XML_PARSER_CONTENT;
11648 #ifdef DEBUG_PUSH
11649 xmlGenericError(xmlGenericErrorContext,
11650 "PP: entering CONTENT\n");
11651 #endif
11652 }
11653 break;
11654 }
11655 case XML_PARSER_MISC:
11656 SKIP_BLANKS;
11657 if (ctxt->input->buf == NULL)
11658 avail = ctxt->input->length -
11659 (ctxt->input->cur - ctxt->input->base);
11660 else
11661 avail = xmlBufUse(ctxt->input->buf->buffer) -
11662 (ctxt->input->cur - ctxt->input->base);
11663 if (avail < 2)
11664 goto done;
11665 cur = ctxt->input->cur[0];
11666 next = ctxt->input->cur[1];
11667 if ((cur == '<') && (next == '?')) {
11668 if ((!terminate) &&
11669 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11670 ctxt->progressive = XML_PARSER_PI;
11671 goto done;
11672 }
11673 #ifdef DEBUG_PUSH
11674 xmlGenericError(xmlGenericErrorContext,
11675 "PP: Parsing PI\n");
11676 #endif
11677 xmlParsePI(ctxt);
11678 if (ctxt->instate == XML_PARSER_EOF)
11679 goto done;
11680 ctxt->instate = XML_PARSER_MISC;
11681 ctxt->progressive = 1;
11682 ctxt->checkIndex = 0;
11683 } else if ((cur == '<') && (next == '!') &&
11684 (ctxt->input->cur[2] == '-') &&
11685 (ctxt->input->cur[3] == '-')) {
11686 if ((!terminate) &&
11687 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11688 ctxt->progressive = XML_PARSER_COMMENT;
11689 goto done;
11690 }
11691 #ifdef DEBUG_PUSH
11692 xmlGenericError(xmlGenericErrorContext,
11693 "PP: Parsing Comment\n");
11694 #endif
11695 xmlParseComment(ctxt);
11696 if (ctxt->instate == XML_PARSER_EOF)
11697 goto done;
11698 ctxt->instate = XML_PARSER_MISC;
11699 ctxt->progressive = 1;
11700 ctxt->checkIndex = 0;
11701 } else if ((cur == '<') && (next == '!') &&
11702 (ctxt->input->cur[2] == 'D') &&
11703 (ctxt->input->cur[3] == 'O') &&
11704 (ctxt->input->cur[4] == 'C') &&
11705 (ctxt->input->cur[5] == 'T') &&
11706 (ctxt->input->cur[6] == 'Y') &&
11707 (ctxt->input->cur[7] == 'P') &&
11708 (ctxt->input->cur[8] == 'E')) {
11709 if ((!terminate) &&
11710 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11711 ctxt->progressive = XML_PARSER_DTD;
11712 goto done;
11713 }
11714 #ifdef DEBUG_PUSH
11715 xmlGenericError(xmlGenericErrorContext,
11716 "PP: Parsing internal subset\n");
11717 #endif
11718 ctxt->inSubset = 1;
11719 ctxt->progressive = 0;
11720 ctxt->checkIndex = 0;
11721 xmlParseDocTypeDecl(ctxt);
11722 if (ctxt->instate == XML_PARSER_EOF)
11723 goto done;
11724 if (RAW == '[') {
11725 ctxt->instate = XML_PARSER_DTD;
11726 #ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering DTD\n");
11729 #endif
11730 } else {
11731 /*
11732 * Create and update the external subset.
11733 */
11734 ctxt->inSubset = 2;
11735 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11736 (ctxt->sax->externalSubset != NULL))
11737 ctxt->sax->externalSubset(ctxt->userData,
11738 ctxt->intSubName, ctxt->extSubSystem,
11739 ctxt->extSubURI);
11740 ctxt->inSubset = 0;
11741 xmlCleanSpecialAttr(ctxt);
11742 ctxt->instate = XML_PARSER_PROLOG;
11743 #ifdef DEBUG_PUSH
11744 xmlGenericError(xmlGenericErrorContext,
11745 "PP: entering PROLOG\n");
11746 #endif
11747 }
11748 } else if ((cur == '<') && (next == '!') &&
11749 (avail < 9)) {
11750 goto done;
11751 } else {
11752 ctxt->instate = XML_PARSER_START_TAG;
11753 ctxt->progressive = XML_PARSER_START_TAG;
11754 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11755 #ifdef DEBUG_PUSH
11756 xmlGenericError(xmlGenericErrorContext,
11757 "PP: entering START_TAG\n");
11758 #endif
11759 }
11760 break;
11761 case XML_PARSER_PROLOG:
11762 SKIP_BLANKS;
11763 if (ctxt->input->buf == NULL)
11764 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11765 else
11766 avail = xmlBufUse(ctxt->input->buf->buffer) -
11767 (ctxt->input->cur - ctxt->input->base);
11768 if (avail < 2)
11769 goto done;
11770 cur = ctxt->input->cur[0];
11771 next = ctxt->input->cur[1];
11772 if ((cur == '<') && (next == '?')) {
11773 if ((!terminate) &&
11774 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11775 ctxt->progressive = XML_PARSER_PI;
11776 goto done;
11777 }
11778 #ifdef DEBUG_PUSH
11779 xmlGenericError(xmlGenericErrorContext,
11780 "PP: Parsing PI\n");
11781 #endif
11782 xmlParsePI(ctxt);
11783 if (ctxt->instate == XML_PARSER_EOF)
11784 goto done;
11785 ctxt->instate = XML_PARSER_PROLOG;
11786 ctxt->progressive = 1;
11787 } else if ((cur == '<') && (next == '!') &&
11788 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11789 if ((!terminate) &&
11790 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11791 ctxt->progressive = XML_PARSER_COMMENT;
11792 goto done;
11793 }
11794 #ifdef DEBUG_PUSH
11795 xmlGenericError(xmlGenericErrorContext,
11796 "PP: Parsing Comment\n");
11797 #endif
11798 xmlParseComment(ctxt);
11799 if (ctxt->instate == XML_PARSER_EOF)
11800 goto done;
11801 ctxt->instate = XML_PARSER_PROLOG;
11802 ctxt->progressive = 1;
11803 } else if ((cur == '<') && (next == '!') &&
11804 (avail < 4)) {
11805 goto done;
11806 } else {
11807 ctxt->instate = XML_PARSER_START_TAG;
11808 if (ctxt->progressive == 0)
11809 ctxt->progressive = XML_PARSER_START_TAG;
11810 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11811 #ifdef DEBUG_PUSH
11812 xmlGenericError(xmlGenericErrorContext,
11813 "PP: entering START_TAG\n");
11814 #endif
11815 }
11816 break;
11817 case XML_PARSER_EPILOG:
11818 SKIP_BLANKS;
11819 if (ctxt->input->buf == NULL)
11820 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11821 else
11822 avail = xmlBufUse(ctxt->input->buf->buffer) -
11823 (ctxt->input->cur - ctxt->input->base);
11824 if (avail < 2)
11825 goto done;
11826 cur = ctxt->input->cur[0];
11827 next = ctxt->input->cur[1];
11828 if ((cur == '<') && (next == '?')) {
11829 if ((!terminate) &&
11830 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11831 ctxt->progressive = XML_PARSER_PI;
11832 goto done;
11833 }
11834 #ifdef DEBUG_PUSH
11835 xmlGenericError(xmlGenericErrorContext,
11836 "PP: Parsing PI\n");
11837 #endif
11838 xmlParsePI(ctxt);
11839 if (ctxt->instate == XML_PARSER_EOF)
11840 goto done;
11841 ctxt->instate = XML_PARSER_EPILOG;
11842 ctxt->progressive = 1;
11843 } else if ((cur == '<') && (next == '!') &&
11844 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11845 if ((!terminate) &&
11846 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11847 ctxt->progressive = XML_PARSER_COMMENT;
11848 goto done;
11849 }
11850 #ifdef DEBUG_PUSH
11851 xmlGenericError(xmlGenericErrorContext,
11852 "PP: Parsing Comment\n");
11853 #endif
11854 xmlParseComment(ctxt);
11855 if (ctxt->instate == XML_PARSER_EOF)
11856 goto done;
11857 ctxt->instate = XML_PARSER_EPILOG;
11858 ctxt->progressive = 1;
11859 } else if ((cur == '<') && (next == '!') &&
11860 (avail < 4)) {
11861 goto done;
11862 } else {
11863 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11864 xmlHaltParser(ctxt);
11865 #ifdef DEBUG_PUSH
11866 xmlGenericError(xmlGenericErrorContext,
11867 "PP: entering EOF\n");
11868 #endif
11869 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11870 ctxt->sax->endDocument(ctxt->userData);
11871 goto done;
11872 }
11873 break;
11874 case XML_PARSER_DTD: {
11875 /*
11876 * Sorry but progressive parsing of the internal subset
11877 * is not expected to be supported. We first check that
11878 * the full content of the internal subset is available and
11879 * the parsing is launched only at that point.
11880 * Internal subset ends up with "']' S? '>'" in an unescaped
11881 * section and not in a ']]>' sequence which are conditional
11882 * sections (whoever argued to keep that crap in XML deserve
11883 * a place in hell !).
11884 */
11885 int base, i;
11886 xmlChar *buf;
11887 xmlChar quote = 0;
11888 size_t use;
11889
11890 base = ctxt->input->cur - ctxt->input->base;
11891 if (base < 0) return(0);
11892 if (ctxt->checkIndex > base)
11893 base = ctxt->checkIndex;
11894 buf = xmlBufContent(ctxt->input->buf->buffer);
11895 use = xmlBufUse(ctxt->input->buf->buffer);
11896 for (;(unsigned int) base < use; base++) {
11897 if (quote != 0) {
11898 if (buf[base] == quote)
11899 quote = 0;
11900 continue;
11901 }
11902 if ((quote == 0) && (buf[base] == '<')) {
11903 int found = 0;
11904 /* special handling of comments */
11905 if (((unsigned int) base + 4 < use) &&
11906 (buf[base + 1] == '!') &&
11907 (buf[base + 2] == '-') &&
11908 (buf[base + 3] == '-')) {
11909 for (;(unsigned int) base + 3 < use; base++) {
11910 if ((buf[base] == '-') &&
11911 (buf[base + 1] == '-') &&
11912 (buf[base + 2] == '>')) {
11913 found = 1;
11914 base += 2;
11915 break;
11916 }
11917 }
11918 if (!found) {
11919 #if 0
11920 fprintf(stderr, "unfinished comment\n");
11921 #endif
11922 break; /* for */
11923 }
11924 continue;
11925 }
11926 }
11927 if (buf[base] == '"') {
11928 quote = '"';
11929 continue;
11930 }
11931 if (buf[base] == '\'') {
11932 quote = '\'';
11933 continue;
11934 }
11935 if (buf[base] == ']') {
11936 #if 0
11937 fprintf(stderr, "%c%c%c%c: ", buf[base],
11938 buf[base + 1], buf[base + 2], buf[base + 3]);
11939 #endif
11940 if ((unsigned int) base +1 >= use)
11941 break;
11942 if (buf[base + 1] == ']') {
11943 /* conditional crap, skip both ']' ! */
11944 base++;
11945 continue;
11946 }
11947 for (i = 1; (unsigned int) base + i < use; i++) {
11948 if (buf[base + i] == '>') {
11949 #if 0
11950 fprintf(stderr, "found\n");
11951 #endif
11952 goto found_end_int_subset;
11953 }
11954 if (!IS_BLANK_CH(buf[base + i])) {
11955 #if 0
11956 fprintf(stderr, "not found\n");
11957 #endif
11958 goto not_end_of_int_subset;
11959 }
11960 }
11961 #if 0
11962 fprintf(stderr, "end of stream\n");
11963 #endif
11964 break;
11965
11966 }
11967 not_end_of_int_subset:
11968 continue; /* for */
11969 }
11970 /*
11971 * We didn't found the end of the Internal subset
11972 */
11973 if (quote == 0)
11974 ctxt->checkIndex = base;
11975 else
11976 ctxt->checkIndex = 0;
11977 #ifdef DEBUG_PUSH
11978 if (next == 0)
11979 xmlGenericError(xmlGenericErrorContext,
11980 "PP: lookup of int subset end filed\n");
11981 #endif
11982 goto done;
11983
11984 found_end_int_subset:
11985 ctxt->checkIndex = 0;
11986 xmlParseInternalSubset(ctxt);
11987 if (ctxt->instate == XML_PARSER_EOF)
11988 goto done;
11989 ctxt->inSubset = 2;
11990 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11991 (ctxt->sax->externalSubset != NULL))
11992 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11993 ctxt->extSubSystem, ctxt->extSubURI);
11994 ctxt->inSubset = 0;
11995 xmlCleanSpecialAttr(ctxt);
11996 if (ctxt->instate == XML_PARSER_EOF)
11997 goto done;
11998 ctxt->instate = XML_PARSER_PROLOG;
11999 ctxt->checkIndex = 0;
12000 #ifdef DEBUG_PUSH
12001 xmlGenericError(xmlGenericErrorContext,
12002 "PP: entering PROLOG\n");
12003 #endif
12004 break;
12005 }
12006 case XML_PARSER_COMMENT:
12007 xmlGenericError(xmlGenericErrorContext,
12008 "PP: internal error, state == COMMENT\n");
12009 ctxt->instate = XML_PARSER_CONTENT;
12010 #ifdef DEBUG_PUSH
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: entering CONTENT\n");
12013 #endif
12014 break;
12015 case XML_PARSER_IGNORE:
12016 xmlGenericError(xmlGenericErrorContext,
12017 "PP: internal error, state == IGNORE");
12018 ctxt->instate = XML_PARSER_DTD;
12019 #ifdef DEBUG_PUSH
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: entering DTD\n");
12022 #endif
12023 break;
12024 case XML_PARSER_PI:
12025 xmlGenericError(xmlGenericErrorContext,
12026 "PP: internal error, state == PI\n");
12027 ctxt->instate = XML_PARSER_CONTENT;
12028 #ifdef DEBUG_PUSH
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: entering CONTENT\n");
12031 #endif
12032 break;
12033 case XML_PARSER_ENTITY_DECL:
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: internal error, state == ENTITY_DECL\n");
12036 ctxt->instate = XML_PARSER_DTD;
12037 #ifdef DEBUG_PUSH
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: entering DTD\n");
12040 #endif
12041 break;
12042 case XML_PARSER_ENTITY_VALUE:
12043 xmlGenericError(xmlGenericErrorContext,
12044 "PP: internal error, state == ENTITY_VALUE\n");
12045 ctxt->instate = XML_PARSER_CONTENT;
12046 #ifdef DEBUG_PUSH
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: entering DTD\n");
12049 #endif
12050 break;
12051 case XML_PARSER_ATTRIBUTE_VALUE:
12052 xmlGenericError(xmlGenericErrorContext,
12053 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12054 ctxt->instate = XML_PARSER_START_TAG;
12055 #ifdef DEBUG_PUSH
12056 xmlGenericError(xmlGenericErrorContext,
12057 "PP: entering START_TAG\n");
12058 #endif
12059 break;
12060 case XML_PARSER_SYSTEM_LITERAL:
12061 xmlGenericError(xmlGenericErrorContext,
12062 "PP: internal error, state == SYSTEM_LITERAL\n");
12063 ctxt->instate = XML_PARSER_START_TAG;
12064 #ifdef DEBUG_PUSH
12065 xmlGenericError(xmlGenericErrorContext,
12066 "PP: entering START_TAG\n");
12067 #endif
12068 break;
12069 case XML_PARSER_PUBLIC_LITERAL:
12070 xmlGenericError(xmlGenericErrorContext,
12071 "PP: internal error, state == PUBLIC_LITERAL\n");
12072 ctxt->instate = XML_PARSER_START_TAG;
12073 #ifdef DEBUG_PUSH
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: entering START_TAG\n");
12076 #endif
12077 break;
12078 }
12079 }
12080 done:
12081 #ifdef DEBUG_PUSH
12082 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12083 #endif
12084 return(ret);
12085 encoding_error:
12086 {
12087 char buffer[150];
12088
12089 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12090 ctxt->input->cur[0], ctxt->input->cur[1],
12091 ctxt->input->cur[2], ctxt->input->cur[3]);
12092 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12093 "Input is not proper UTF-8, indicate encoding !\n%s",
12094 BAD_CAST buffer, NULL);
12095 }
12096 return(0);
12097 }
12098
12099 /**
12100 * xmlParseCheckTransition:
12101 * @ctxt: an XML parser context
12102 * @chunk: a char array
12103 * @size: the size in byte of the chunk
12104 *
12105 * Check depending on the current parser state if the chunk given must be
12106 * processed immediately or one need more data to advance on parsing.
12107 *
12108 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12109 */
12110 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12111 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12112 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12113 return(-1);
12114 if (ctxt->instate == XML_PARSER_START_TAG) {
12115 if (memchr(chunk, '>', size) != NULL)
12116 return(1);
12117 return(0);
12118 }
12119 if (ctxt->progressive == XML_PARSER_COMMENT) {
12120 if (memchr(chunk, '>', size) != NULL)
12121 return(1);
12122 return(0);
12123 }
12124 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12125 if (memchr(chunk, '>', size) != NULL)
12126 return(1);
12127 return(0);
12128 }
12129 if (ctxt->progressive == XML_PARSER_PI) {
12130 if (memchr(chunk, '>', size) != NULL)
12131 return(1);
12132 return(0);
12133 }
12134 if (ctxt->instate == XML_PARSER_END_TAG) {
12135 if (memchr(chunk, '>', size) != NULL)
12136 return(1);
12137 return(0);
12138 }
12139 if ((ctxt->progressive == XML_PARSER_DTD) ||
12140 (ctxt->instate == XML_PARSER_DTD)) {
12141 if (memchr(chunk, '>', size) != NULL)
12142 return(1);
12143 return(0);
12144 }
12145 return(1);
12146 }
12147
12148 /**
12149 * xmlParseChunk:
12150 * @ctxt: an XML parser context
12151 * @chunk: an char array
12152 * @size: the size in byte of the chunk
12153 * @terminate: last chunk indicator
12154 *
12155 * Parse a Chunk of memory
12156 *
12157 * Returns zero if no error, the xmlParserErrors otherwise.
12158 */
12159 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12160 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12161 int terminate) {
12162 int end_in_lf = 0;
12163 int remain = 0;
12164 size_t old_avail = 0;
12165 size_t avail = 0;
12166
12167 if (ctxt == NULL)
12168 return(XML_ERR_INTERNAL_ERROR);
12169 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12170 return(ctxt->errNo);
12171 if (ctxt->instate == XML_PARSER_EOF)
12172 return(-1);
12173 if (ctxt->instate == XML_PARSER_START)
12174 xmlDetectSAX2(ctxt);
12175 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12176 (chunk[size - 1] == '\r')) {
12177 end_in_lf = 1;
12178 size--;
12179 }
12180
12181 xmldecl_done:
12182
12183 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12184 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12185 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12186 size_t cur = ctxt->input->cur - ctxt->input->base;
12187 int res;
12188
12189 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12190 /*
12191 * Specific handling if we autodetected an encoding, we should not
12192 * push more than the first line ... which depend on the encoding
12193 * And only push the rest once the final encoding was detected
12194 */
12195 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12196 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12197 unsigned int len = 45;
12198
12199 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12200 BAD_CAST "UTF-16")) ||
12201 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12202 BAD_CAST "UTF16")))
12203 len = 90;
12204 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12205 BAD_CAST "UCS-4")) ||
12206 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12207 BAD_CAST "UCS4")))
12208 len = 180;
12209
12210 if (ctxt->input->buf->rawconsumed < len)
12211 len -= ctxt->input->buf->rawconsumed;
12212
12213 /*
12214 * Change size for reading the initial declaration only
12215 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12216 * will blindly copy extra bytes from memory.
12217 */
12218 if ((unsigned int) size > len) {
12219 remain = size - len;
12220 size = len;
12221 } else {
12222 remain = 0;
12223 }
12224 }
12225 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12226 if (res < 0) {
12227 ctxt->errNo = XML_PARSER_EOF;
12228 xmlHaltParser(ctxt);
12229 return (XML_PARSER_EOF);
12230 }
12231 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12232 #ifdef DEBUG_PUSH
12233 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12234 #endif
12235
12236 } else if (ctxt->instate != XML_PARSER_EOF) {
12237 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12238 xmlParserInputBufferPtr in = ctxt->input->buf;
12239 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12240 (in->raw != NULL)) {
12241 int nbchars;
12242 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12243 size_t current = ctxt->input->cur - ctxt->input->base;
12244
12245 nbchars = xmlCharEncInput(in, terminate);
12246 if (nbchars < 0) {
12247 /* TODO 2.6.0 */
12248 xmlGenericError(xmlGenericErrorContext,
12249 "xmlParseChunk: encoder error\n");
12250 xmlHaltParser(ctxt);
12251 return(XML_ERR_INVALID_ENCODING);
12252 }
12253 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12254 }
12255 }
12256 }
12257 if (remain != 0) {
12258 xmlParseTryOrFinish(ctxt, 0);
12259 } else {
12260 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12261 avail = xmlBufUse(ctxt->input->buf->buffer);
12262 /*
12263 * Depending on the current state it may not be such
12264 * a good idea to try parsing if there is nothing in the chunk
12265 * which would be worth doing a parser state transition and we
12266 * need to wait for more data
12267 */
12268 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12269 (old_avail == 0) || (avail == 0) ||
12270 (xmlParseCheckTransition(ctxt,
12271 (const char *)&ctxt->input->base[old_avail],
12272 avail - old_avail)))
12273 xmlParseTryOrFinish(ctxt, terminate);
12274 }
12275 if (ctxt->instate == XML_PARSER_EOF)
12276 return(ctxt->errNo);
12277
12278 if ((ctxt->input != NULL) &&
12279 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12280 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12281 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12282 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12283 xmlHaltParser(ctxt);
12284 }
12285 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12286 return(ctxt->errNo);
12287
12288 if (remain != 0) {
12289 chunk += size;
12290 size = remain;
12291 remain = 0;
12292 goto xmldecl_done;
12293 }
12294 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12295 (ctxt->input->buf != NULL)) {
12296 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12297 ctxt->input);
12298 size_t current = ctxt->input->cur - ctxt->input->base;
12299
12300 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12301
12302 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12303 base, current);
12304 }
12305 if (terminate) {
12306 /*
12307 * Check for termination
12308 */
12309 int cur_avail = 0;
12310
12311 if (ctxt->input != NULL) {
12312 if (ctxt->input->buf == NULL)
12313 cur_avail = ctxt->input->length -
12314 (ctxt->input->cur - ctxt->input->base);
12315 else
12316 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12317 (ctxt->input->cur - ctxt->input->base);
12318 }
12319
12320 if ((ctxt->instate != XML_PARSER_EOF) &&
12321 (ctxt->instate != XML_PARSER_EPILOG)) {
12322 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12323 }
12324 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12326 }
12327 if (ctxt->instate != XML_PARSER_EOF) {
12328 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12329 ctxt->sax->endDocument(ctxt->userData);
12330 }
12331 ctxt->instate = XML_PARSER_EOF;
12332 }
12333 if (ctxt->wellFormed == 0)
12334 return((xmlParserErrors) ctxt->errNo);
12335 else
12336 return(0);
12337 }
12338
12339 /************************************************************************
12340 * *
12341 * I/O front end functions to the parser *
12342 * *
12343 ************************************************************************/
12344
12345 /**
12346 * xmlCreatePushParserCtxt:
12347 * @sax: a SAX handler
12348 * @user_data: The user data returned on SAX callbacks
12349 * @chunk: a pointer to an array of chars
12350 * @size: number of chars in the array
12351 * @filename: an optional file name or URI
12352 *
12353 * Create a parser context for using the XML parser in push mode.
12354 * If @buffer and @size are non-NULL, the data is used to detect
12355 * the encoding. The remaining characters will be parsed so they
12356 * don't need to be fed in again through xmlParseChunk.
12357 * To allow content encoding detection, @size should be >= 4
12358 * The value of @filename is used for fetching external entities
12359 * and error/warning reports.
12360 *
12361 * Returns the new parser context or NULL
12362 */
12363
12364 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12365 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12366 const char *chunk, int size, const char *filename) {
12367 xmlParserCtxtPtr ctxt;
12368 xmlParserInputPtr inputStream;
12369 xmlParserInputBufferPtr buf;
12370 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12371
12372 /*
12373 * plug some encoding conversion routines
12374 */
12375 if ((chunk != NULL) && (size >= 4))
12376 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12377
12378 buf = xmlAllocParserInputBuffer(enc);
12379 if (buf == NULL) return(NULL);
12380
12381 ctxt = xmlNewParserCtxt();
12382 if (ctxt == NULL) {
12383 xmlErrMemory(NULL, "creating parser: out of memory\n");
12384 xmlFreeParserInputBuffer(buf);
12385 return(NULL);
12386 }
12387 ctxt->dictNames = 1;
12388 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12389 if (ctxt->pushTab == NULL) {
12390 xmlErrMemory(ctxt, NULL);
12391 xmlFreeParserInputBuffer(buf);
12392 xmlFreeParserCtxt(ctxt);
12393 return(NULL);
12394 }
12395 if (sax != NULL) {
12396 #ifdef LIBXML_SAX1_ENABLED
12397 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12398 #endif /* LIBXML_SAX1_ENABLED */
12399 xmlFree(ctxt->sax);
12400 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12401 if (ctxt->sax == NULL) {
12402 xmlErrMemory(ctxt, NULL);
12403 xmlFreeParserInputBuffer(buf);
12404 xmlFreeParserCtxt(ctxt);
12405 return(NULL);
12406 }
12407 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12408 if (sax->initialized == XML_SAX2_MAGIC)
12409 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12410 else
12411 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12412 if (user_data != NULL)
12413 ctxt->userData = user_data;
12414 }
12415 if (filename == NULL) {
12416 ctxt->directory = NULL;
12417 } else {
12418 ctxt->directory = xmlParserGetDirectory(filename);
12419 }
12420
12421 inputStream = xmlNewInputStream(ctxt);
12422 if (inputStream == NULL) {
12423 xmlFreeParserCtxt(ctxt);
12424 xmlFreeParserInputBuffer(buf);
12425 return(NULL);
12426 }
12427
12428 if (filename == NULL)
12429 inputStream->filename = NULL;
12430 else {
12431 inputStream->filename = (char *)
12432 xmlCanonicPath((const xmlChar *) filename);
12433 if (inputStream->filename == NULL) {
12434 xmlFreeParserCtxt(ctxt);
12435 xmlFreeParserInputBuffer(buf);
12436 return(NULL);
12437 }
12438 }
12439 inputStream->buf = buf;
12440 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12441 inputPush(ctxt, inputStream);
12442
12443 /*
12444 * If the caller didn't provide an initial 'chunk' for determining
12445 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12446 * that it can be automatically determined later
12447 */
12448 if ((size == 0) || (chunk == NULL)) {
12449 ctxt->charset = XML_CHAR_ENCODING_NONE;
12450 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12451 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12452 size_t cur = ctxt->input->cur - ctxt->input->base;
12453
12454 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12455
12456 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12457 #ifdef DEBUG_PUSH
12458 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12459 #endif
12460 }
12461
12462 if (enc != XML_CHAR_ENCODING_NONE) {
12463 xmlSwitchEncoding(ctxt, enc);
12464 }
12465
12466 return(ctxt);
12467 }
12468 #endif /* LIBXML_PUSH_ENABLED */
12469
12470 /**
12471 * xmlHaltParser:
12472 * @ctxt: an XML parser context
12473 *
12474 * Blocks further parser processing don't override error
12475 * for internal use
12476 */
12477 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12478 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12479 if (ctxt == NULL)
12480 return;
12481 ctxt->instate = XML_PARSER_EOF;
12482 ctxt->disableSAX = 1;
12483 while (ctxt->inputNr > 1)
12484 xmlFreeInputStream(inputPop(ctxt));
12485 if (ctxt->input != NULL) {
12486 /*
12487 * in case there was a specific allocation deallocate before
12488 * overriding base
12489 */
12490 if (ctxt->input->free != NULL) {
12491 ctxt->input->free((xmlChar *) ctxt->input->base);
12492 ctxt->input->free = NULL;
12493 }
12494 if (ctxt->input->buf != NULL) {
12495 xmlFreeParserInputBuffer(ctxt->input->buf);
12496 ctxt->input->buf = NULL;
12497 }
12498 ctxt->input->cur = BAD_CAST"";
12499 ctxt->input->length = 0;
12500 ctxt->input->base = ctxt->input->cur;
12501 ctxt->input->end = ctxt->input->cur;
12502 }
12503 }
12504
12505 /**
12506 * xmlStopParser:
12507 * @ctxt: an XML parser context
12508 *
12509 * Blocks further parser processing
12510 */
12511 void
xmlStopParser(xmlParserCtxtPtr ctxt)12512 xmlStopParser(xmlParserCtxtPtr ctxt) {
12513 if (ctxt == NULL)
12514 return;
12515 xmlHaltParser(ctxt);
12516 ctxt->errNo = XML_ERR_USER_STOP;
12517 }
12518
12519 /**
12520 * xmlCreateIOParserCtxt:
12521 * @sax: a SAX handler
12522 * @user_data: The user data returned on SAX callbacks
12523 * @ioread: an I/O read function
12524 * @ioclose: an I/O close function
12525 * @ioctx: an I/O handler
12526 * @enc: the charset encoding if known
12527 *
12528 * Create a parser context for using the XML parser with an existing
12529 * I/O stream
12530 *
12531 * Returns the new parser context or NULL
12532 */
12533 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12534 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12535 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12536 void *ioctx, xmlCharEncoding enc) {
12537 xmlParserCtxtPtr ctxt;
12538 xmlParserInputPtr inputStream;
12539 xmlParserInputBufferPtr buf;
12540
12541 if (ioread == NULL) return(NULL);
12542
12543 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12544 if (buf == NULL) {
12545 if (ioclose != NULL)
12546 ioclose(ioctx);
12547 return (NULL);
12548 }
12549
12550 ctxt = xmlNewParserCtxt();
12551 if (ctxt == NULL) {
12552 xmlFreeParserInputBuffer(buf);
12553 return(NULL);
12554 }
12555 if (sax != NULL) {
12556 #ifdef LIBXML_SAX1_ENABLED
12557 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12558 #endif /* LIBXML_SAX1_ENABLED */
12559 xmlFree(ctxt->sax);
12560 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12561 if (ctxt->sax == NULL) {
12562 xmlErrMemory(ctxt, NULL);
12563 xmlFreeParserCtxt(ctxt);
12564 return(NULL);
12565 }
12566 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12567 if (sax->initialized == XML_SAX2_MAGIC)
12568 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12569 else
12570 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12571 if (user_data != NULL)
12572 ctxt->userData = user_data;
12573 }
12574
12575 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12576 if (inputStream == NULL) {
12577 xmlFreeParserCtxt(ctxt);
12578 return(NULL);
12579 }
12580 inputPush(ctxt, inputStream);
12581
12582 return(ctxt);
12583 }
12584
12585 #ifdef LIBXML_VALID_ENABLED
12586 /************************************************************************
12587 * *
12588 * Front ends when parsing a DTD *
12589 * *
12590 ************************************************************************/
12591
12592 /**
12593 * xmlIOParseDTD:
12594 * @sax: the SAX handler block or NULL
12595 * @input: an Input Buffer
12596 * @enc: the charset encoding if known
12597 *
12598 * Load and parse a DTD
12599 *
12600 * Returns the resulting xmlDtdPtr or NULL in case of error.
12601 * @input will be freed by the function in any case.
12602 */
12603
12604 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12605 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12606 xmlCharEncoding enc) {
12607 xmlDtdPtr ret = NULL;
12608 xmlParserCtxtPtr ctxt;
12609 xmlParserInputPtr pinput = NULL;
12610 xmlChar start[4];
12611
12612 if (input == NULL)
12613 return(NULL);
12614
12615 ctxt = xmlNewParserCtxt();
12616 if (ctxt == NULL) {
12617 xmlFreeParserInputBuffer(input);
12618 return(NULL);
12619 }
12620
12621 /* We are loading a DTD */
12622 ctxt->options |= XML_PARSE_DTDLOAD;
12623
12624 /*
12625 * Set-up the SAX context
12626 */
12627 if (sax != NULL) {
12628 if (ctxt->sax != NULL)
12629 xmlFree(ctxt->sax);
12630 ctxt->sax = sax;
12631 ctxt->userData = ctxt;
12632 }
12633 xmlDetectSAX2(ctxt);
12634
12635 /*
12636 * generate a parser input from the I/O handler
12637 */
12638
12639 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12640 if (pinput == NULL) {
12641 if (sax != NULL) ctxt->sax = NULL;
12642 xmlFreeParserInputBuffer(input);
12643 xmlFreeParserCtxt(ctxt);
12644 return(NULL);
12645 }
12646
12647 /*
12648 * plug some encoding conversion routines here.
12649 */
12650 if (xmlPushInput(ctxt, pinput) < 0) {
12651 if (sax != NULL) ctxt->sax = NULL;
12652 xmlFreeParserCtxt(ctxt);
12653 return(NULL);
12654 }
12655 if (enc != XML_CHAR_ENCODING_NONE) {
12656 xmlSwitchEncoding(ctxt, enc);
12657 }
12658
12659 pinput->filename = NULL;
12660 pinput->line = 1;
12661 pinput->col = 1;
12662 pinput->base = ctxt->input->cur;
12663 pinput->cur = ctxt->input->cur;
12664 pinput->free = NULL;
12665
12666 /*
12667 * let's parse that entity knowing it's an external subset.
12668 */
12669 ctxt->inSubset = 2;
12670 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12671 if (ctxt->myDoc == NULL) {
12672 xmlErrMemory(ctxt, "New Doc failed");
12673 return(NULL);
12674 }
12675 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12676 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12677 BAD_CAST "none", BAD_CAST "none");
12678
12679 if ((enc == XML_CHAR_ENCODING_NONE) &&
12680 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12681 /*
12682 * Get the 4 first bytes and decode the charset
12683 * if enc != XML_CHAR_ENCODING_NONE
12684 * plug some encoding conversion routines.
12685 */
12686 start[0] = RAW;
12687 start[1] = NXT(1);
12688 start[2] = NXT(2);
12689 start[3] = NXT(3);
12690 enc = xmlDetectCharEncoding(start, 4);
12691 if (enc != XML_CHAR_ENCODING_NONE) {
12692 xmlSwitchEncoding(ctxt, enc);
12693 }
12694 }
12695
12696 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12697
12698 if (ctxt->myDoc != NULL) {
12699 if (ctxt->wellFormed) {
12700 ret = ctxt->myDoc->extSubset;
12701 ctxt->myDoc->extSubset = NULL;
12702 if (ret != NULL) {
12703 xmlNodePtr tmp;
12704
12705 ret->doc = NULL;
12706 tmp = ret->children;
12707 while (tmp != NULL) {
12708 tmp->doc = NULL;
12709 tmp = tmp->next;
12710 }
12711 }
12712 } else {
12713 ret = NULL;
12714 }
12715 xmlFreeDoc(ctxt->myDoc);
12716 ctxt->myDoc = NULL;
12717 }
12718 if (sax != NULL) ctxt->sax = NULL;
12719 xmlFreeParserCtxt(ctxt);
12720
12721 return(ret);
12722 }
12723
12724 /**
12725 * xmlSAXParseDTD:
12726 * @sax: the SAX handler block
12727 * @ExternalID: a NAME* containing the External ID of the DTD
12728 * @SystemID: a NAME* containing the URL to the DTD
12729 *
12730 * Load and parse an external subset.
12731 *
12732 * Returns the resulting xmlDtdPtr or NULL in case of error.
12733 */
12734
12735 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12736 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12737 const xmlChar *SystemID) {
12738 xmlDtdPtr ret = NULL;
12739 xmlParserCtxtPtr ctxt;
12740 xmlParserInputPtr input = NULL;
12741 xmlCharEncoding enc;
12742 xmlChar* systemIdCanonic;
12743
12744 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12745
12746 ctxt = xmlNewParserCtxt();
12747 if (ctxt == NULL) {
12748 return(NULL);
12749 }
12750
12751 /* We are loading a DTD */
12752 ctxt->options |= XML_PARSE_DTDLOAD;
12753
12754 /*
12755 * Set-up the SAX context
12756 */
12757 if (sax != NULL) {
12758 if (ctxt->sax != NULL)
12759 xmlFree(ctxt->sax);
12760 ctxt->sax = sax;
12761 ctxt->userData = ctxt;
12762 }
12763
12764 /*
12765 * Canonicalise the system ID
12766 */
12767 systemIdCanonic = xmlCanonicPath(SystemID);
12768 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12769 xmlFreeParserCtxt(ctxt);
12770 return(NULL);
12771 }
12772
12773 /*
12774 * Ask the Entity resolver to load the damn thing
12775 */
12776
12777 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12778 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12779 systemIdCanonic);
12780 if (input == NULL) {
12781 if (sax != NULL) ctxt->sax = NULL;
12782 xmlFreeParserCtxt(ctxt);
12783 if (systemIdCanonic != NULL)
12784 xmlFree(systemIdCanonic);
12785 return(NULL);
12786 }
12787
12788 /*
12789 * plug some encoding conversion routines here.
12790 */
12791 if (xmlPushInput(ctxt, input) < 0) {
12792 if (sax != NULL) ctxt->sax = NULL;
12793 xmlFreeParserCtxt(ctxt);
12794 if (systemIdCanonic != NULL)
12795 xmlFree(systemIdCanonic);
12796 return(NULL);
12797 }
12798 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12799 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12800 xmlSwitchEncoding(ctxt, enc);
12801 }
12802
12803 if (input->filename == NULL)
12804 input->filename = (char *) systemIdCanonic;
12805 else
12806 xmlFree(systemIdCanonic);
12807 input->line = 1;
12808 input->col = 1;
12809 input->base = ctxt->input->cur;
12810 input->cur = ctxt->input->cur;
12811 input->free = NULL;
12812
12813 /*
12814 * let's parse that entity knowing it's an external subset.
12815 */
12816 ctxt->inSubset = 2;
12817 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12818 if (ctxt->myDoc == NULL) {
12819 xmlErrMemory(ctxt, "New Doc failed");
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
12822 return(NULL);
12823 }
12824 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12825 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12826 ExternalID, SystemID);
12827 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12828
12829 if (ctxt->myDoc != NULL) {
12830 if (ctxt->wellFormed) {
12831 ret = ctxt->myDoc->extSubset;
12832 ctxt->myDoc->extSubset = NULL;
12833 if (ret != NULL) {
12834 xmlNodePtr tmp;
12835
12836 ret->doc = NULL;
12837 tmp = ret->children;
12838 while (tmp != NULL) {
12839 tmp->doc = NULL;
12840 tmp = tmp->next;
12841 }
12842 }
12843 } else {
12844 ret = NULL;
12845 }
12846 xmlFreeDoc(ctxt->myDoc);
12847 ctxt->myDoc = NULL;
12848 }
12849 if (sax != NULL) ctxt->sax = NULL;
12850 xmlFreeParserCtxt(ctxt);
12851
12852 return(ret);
12853 }
12854
12855
12856 /**
12857 * xmlParseDTD:
12858 * @ExternalID: a NAME* containing the External ID of the DTD
12859 * @SystemID: a NAME* containing the URL to the DTD
12860 *
12861 * Load and parse an external subset.
12862 *
12863 * Returns the resulting xmlDtdPtr or NULL in case of error.
12864 */
12865
12866 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12867 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12868 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12869 }
12870 #endif /* LIBXML_VALID_ENABLED */
12871
12872 /************************************************************************
12873 * *
12874 * Front ends when parsing an Entity *
12875 * *
12876 ************************************************************************/
12877
12878 /**
12879 * xmlParseCtxtExternalEntity:
12880 * @ctx: the existing parsing context
12881 * @URL: the URL for the entity to load
12882 * @ID: the System ID for the entity to load
12883 * @lst: the return value for the set of parsed nodes
12884 *
12885 * Parse an external general entity within an existing parsing context
12886 * An external general parsed entity is well-formed if it matches the
12887 * production labeled extParsedEnt.
12888 *
12889 * [78] extParsedEnt ::= TextDecl? content
12890 *
12891 * Returns 0 if the entity is well formed, -1 in case of args problem and
12892 * the parser error code otherwise
12893 */
12894
12895 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12896 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12897 const xmlChar *ID, xmlNodePtr *lst) {
12898 xmlParserCtxtPtr ctxt;
12899 xmlDocPtr newDoc;
12900 xmlNodePtr newRoot;
12901 xmlSAXHandlerPtr oldsax = NULL;
12902 int ret = 0;
12903 xmlChar start[4];
12904 xmlCharEncoding enc;
12905
12906 if (ctx == NULL) return(-1);
12907
12908 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12909 (ctx->depth > 1024)) {
12910 return(XML_ERR_ENTITY_LOOP);
12911 }
12912
12913 if (lst != NULL)
12914 *lst = NULL;
12915 if ((URL == NULL) && (ID == NULL))
12916 return(-1);
12917 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12918 return(-1);
12919
12920 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12921 if (ctxt == NULL) {
12922 return(-1);
12923 }
12924
12925 oldsax = ctxt->sax;
12926 ctxt->sax = ctx->sax;
12927 xmlDetectSAX2(ctxt);
12928 newDoc = xmlNewDoc(BAD_CAST "1.0");
12929 if (newDoc == NULL) {
12930 xmlFreeParserCtxt(ctxt);
12931 return(-1);
12932 }
12933 newDoc->properties = XML_DOC_INTERNAL;
12934 if (ctx->myDoc->dict) {
12935 newDoc->dict = ctx->myDoc->dict;
12936 xmlDictReference(newDoc->dict);
12937 }
12938 if (ctx->myDoc != NULL) {
12939 newDoc->intSubset = ctx->myDoc->intSubset;
12940 newDoc->extSubset = ctx->myDoc->extSubset;
12941 }
12942 if (ctx->myDoc->URL != NULL) {
12943 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12944 }
12945 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12946 if (newRoot == NULL) {
12947 ctxt->sax = oldsax;
12948 xmlFreeParserCtxt(ctxt);
12949 newDoc->intSubset = NULL;
12950 newDoc->extSubset = NULL;
12951 xmlFreeDoc(newDoc);
12952 return(-1);
12953 }
12954 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12955 nodePush(ctxt, newDoc->children);
12956 if (ctx->myDoc == NULL) {
12957 ctxt->myDoc = newDoc;
12958 } else {
12959 ctxt->myDoc = ctx->myDoc;
12960 newDoc->children->doc = ctx->myDoc;
12961 }
12962
12963 /*
12964 * Get the 4 first bytes and decode the charset
12965 * if enc != XML_CHAR_ENCODING_NONE
12966 * plug some encoding conversion routines.
12967 */
12968 GROW
12969 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12970 start[0] = RAW;
12971 start[1] = NXT(1);
12972 start[2] = NXT(2);
12973 start[3] = NXT(3);
12974 enc = xmlDetectCharEncoding(start, 4);
12975 if (enc != XML_CHAR_ENCODING_NONE) {
12976 xmlSwitchEncoding(ctxt, enc);
12977 }
12978 }
12979
12980 /*
12981 * Parse a possible text declaration first
12982 */
12983 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12984 xmlParseTextDecl(ctxt);
12985 /*
12986 * An XML-1.0 document can't reference an entity not XML-1.0
12987 */
12988 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12989 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12990 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12991 "Version mismatch between document and entity\n");
12992 }
12993 }
12994
12995 /*
12996 * If the user provided its own SAX callbacks then reuse the
12997 * useData callback field, otherwise the expected setup in a
12998 * DOM builder is to have userData == ctxt
12999 */
13000 if (ctx->userData == ctx)
13001 ctxt->userData = ctxt;
13002 else
13003 ctxt->userData = ctx->userData;
13004
13005 /*
13006 * Doing validity checking on chunk doesn't make sense
13007 */
13008 ctxt->instate = XML_PARSER_CONTENT;
13009 ctxt->validate = ctx->validate;
13010 ctxt->valid = ctx->valid;
13011 ctxt->loadsubset = ctx->loadsubset;
13012 ctxt->depth = ctx->depth + 1;
13013 ctxt->replaceEntities = ctx->replaceEntities;
13014 if (ctxt->validate) {
13015 ctxt->vctxt.error = ctx->vctxt.error;
13016 ctxt->vctxt.warning = ctx->vctxt.warning;
13017 } else {
13018 ctxt->vctxt.error = NULL;
13019 ctxt->vctxt.warning = NULL;
13020 }
13021 ctxt->vctxt.nodeTab = NULL;
13022 ctxt->vctxt.nodeNr = 0;
13023 ctxt->vctxt.nodeMax = 0;
13024 ctxt->vctxt.node = NULL;
13025 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13026 ctxt->dict = ctx->dict;
13027 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13028 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13029 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13030 ctxt->dictNames = ctx->dictNames;
13031 ctxt->attsDefault = ctx->attsDefault;
13032 ctxt->attsSpecial = ctx->attsSpecial;
13033 ctxt->linenumbers = ctx->linenumbers;
13034
13035 xmlParseContent(ctxt);
13036
13037 ctx->validate = ctxt->validate;
13038 ctx->valid = ctxt->valid;
13039 if ((RAW == '<') && (NXT(1) == '/')) {
13040 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13041 } else if (RAW != 0) {
13042 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13043 }
13044 if (ctxt->node != newDoc->children) {
13045 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13046 }
13047
13048 if (!ctxt->wellFormed) {
13049 if (ctxt->errNo == 0)
13050 ret = 1;
13051 else
13052 ret = ctxt->errNo;
13053 } else {
13054 if (lst != NULL) {
13055 xmlNodePtr cur;
13056
13057 /*
13058 * Return the newly created nodeset after unlinking it from
13059 * they pseudo parent.
13060 */
13061 cur = newDoc->children->children;
13062 *lst = cur;
13063 while (cur != NULL) {
13064 cur->parent = NULL;
13065 cur = cur->next;
13066 }
13067 newDoc->children->children = NULL;
13068 }
13069 ret = 0;
13070 }
13071 ctxt->sax = oldsax;
13072 ctxt->dict = NULL;
13073 ctxt->attsDefault = NULL;
13074 ctxt->attsSpecial = NULL;
13075 xmlFreeParserCtxt(ctxt);
13076 newDoc->intSubset = NULL;
13077 newDoc->extSubset = NULL;
13078 xmlFreeDoc(newDoc);
13079
13080 return(ret);
13081 }
13082
13083 /**
13084 * xmlParseExternalEntityPrivate:
13085 * @doc: the document the chunk pertains to
13086 * @oldctxt: the previous parser context if available
13087 * @sax: the SAX handler bloc (possibly NULL)
13088 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13089 * @depth: Used for loop detection, use 0
13090 * @URL: the URL for the entity to load
13091 * @ID: the System ID for the entity to load
13092 * @list: the return value for the set of parsed nodes
13093 *
13094 * Private version of xmlParseExternalEntity()
13095 *
13096 * Returns 0 if the entity is well formed, -1 in case of args problem and
13097 * the parser error code otherwise
13098 */
13099
13100 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13101 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13102 xmlSAXHandlerPtr sax,
13103 void *user_data, int depth, const xmlChar *URL,
13104 const xmlChar *ID, xmlNodePtr *list) {
13105 xmlParserCtxtPtr ctxt;
13106 xmlDocPtr newDoc;
13107 xmlNodePtr newRoot;
13108 xmlSAXHandlerPtr oldsax = NULL;
13109 xmlParserErrors ret = XML_ERR_OK;
13110 xmlChar start[4];
13111 xmlCharEncoding enc;
13112
13113 if (((depth > 40) &&
13114 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13115 (depth > 1024)) {
13116 return(XML_ERR_ENTITY_LOOP);
13117 }
13118
13119 if (list != NULL)
13120 *list = NULL;
13121 if ((URL == NULL) && (ID == NULL))
13122 return(XML_ERR_INTERNAL_ERROR);
13123 if (doc == NULL)
13124 return(XML_ERR_INTERNAL_ERROR);
13125
13126
13127 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13128 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13129 ctxt->userData = ctxt;
13130 if (oldctxt != NULL) {
13131 ctxt->_private = oldctxt->_private;
13132 ctxt->loadsubset = oldctxt->loadsubset;
13133 ctxt->validate = oldctxt->validate;
13134 ctxt->external = oldctxt->external;
13135 ctxt->record_info = oldctxt->record_info;
13136 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13137 ctxt->node_seq.length = oldctxt->node_seq.length;
13138 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13139 } else {
13140 /*
13141 * Doing validity checking on chunk without context
13142 * doesn't make sense
13143 */
13144 ctxt->_private = NULL;
13145 ctxt->validate = 0;
13146 ctxt->external = 2;
13147 ctxt->loadsubset = 0;
13148 }
13149 if (sax != NULL) {
13150 oldsax = ctxt->sax;
13151 ctxt->sax = sax;
13152 if (user_data != NULL)
13153 ctxt->userData = user_data;
13154 }
13155 xmlDetectSAX2(ctxt);
13156 newDoc = xmlNewDoc(BAD_CAST "1.0");
13157 if (newDoc == NULL) {
13158 ctxt->node_seq.maximum = 0;
13159 ctxt->node_seq.length = 0;
13160 ctxt->node_seq.buffer = NULL;
13161 xmlFreeParserCtxt(ctxt);
13162 return(XML_ERR_INTERNAL_ERROR);
13163 }
13164 newDoc->properties = XML_DOC_INTERNAL;
13165 newDoc->intSubset = doc->intSubset;
13166 newDoc->extSubset = doc->extSubset;
13167 newDoc->dict = doc->dict;
13168 xmlDictReference(newDoc->dict);
13169
13170 if (doc->URL != NULL) {
13171 newDoc->URL = xmlStrdup(doc->URL);
13172 }
13173 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13174 if (newRoot == NULL) {
13175 if (sax != NULL)
13176 ctxt->sax = oldsax;
13177 ctxt->node_seq.maximum = 0;
13178 ctxt->node_seq.length = 0;
13179 ctxt->node_seq.buffer = NULL;
13180 xmlFreeParserCtxt(ctxt);
13181 newDoc->intSubset = NULL;
13182 newDoc->extSubset = NULL;
13183 xmlFreeDoc(newDoc);
13184 return(XML_ERR_INTERNAL_ERROR);
13185 }
13186 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13187 nodePush(ctxt, newDoc->children);
13188 ctxt->myDoc = doc;
13189 newRoot->doc = doc;
13190
13191 /*
13192 * Get the 4 first bytes and decode the charset
13193 * if enc != XML_CHAR_ENCODING_NONE
13194 * plug some encoding conversion routines.
13195 */
13196 GROW;
13197 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13198 start[0] = RAW;
13199 start[1] = NXT(1);
13200 start[2] = NXT(2);
13201 start[3] = NXT(3);
13202 enc = xmlDetectCharEncoding(start, 4);
13203 if (enc != XML_CHAR_ENCODING_NONE) {
13204 xmlSwitchEncoding(ctxt, enc);
13205 }
13206 }
13207
13208 /*
13209 * Parse a possible text declaration first
13210 */
13211 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13212 xmlParseTextDecl(ctxt);
13213 }
13214
13215 ctxt->instate = XML_PARSER_CONTENT;
13216 ctxt->depth = depth;
13217
13218 xmlParseContent(ctxt);
13219
13220 if ((RAW == '<') && (NXT(1) == '/')) {
13221 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13222 } else if (RAW != 0) {
13223 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13224 }
13225 if (ctxt->node != newDoc->children) {
13226 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13227 }
13228
13229 if (!ctxt->wellFormed) {
13230 if (ctxt->errNo == 0)
13231 ret = XML_ERR_INTERNAL_ERROR;
13232 else
13233 ret = (xmlParserErrors)ctxt->errNo;
13234 } else {
13235 if (list != NULL) {
13236 xmlNodePtr cur;
13237
13238 /*
13239 * Return the newly created nodeset after unlinking it from
13240 * they pseudo parent.
13241 */
13242 cur = newDoc->children->children;
13243 *list = cur;
13244 while (cur != NULL) {
13245 cur->parent = NULL;
13246 cur = cur->next;
13247 }
13248 newDoc->children->children = NULL;
13249 }
13250 ret = XML_ERR_OK;
13251 }
13252
13253 /*
13254 * Record in the parent context the number of entities replacement
13255 * done when parsing that reference.
13256 */
13257 if (oldctxt != NULL)
13258 oldctxt->nbentities += ctxt->nbentities;
13259
13260 /*
13261 * Also record the size of the entity parsed
13262 */
13263 if (ctxt->input != NULL && oldctxt != NULL) {
13264 oldctxt->sizeentities += ctxt->input->consumed;
13265 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13266 }
13267 /*
13268 * And record the last error if any
13269 */
13270 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13271 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13272
13273 if (sax != NULL)
13274 ctxt->sax = oldsax;
13275 if (oldctxt != NULL) {
13276 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13277 oldctxt->node_seq.length = ctxt->node_seq.length;
13278 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13279 }
13280 ctxt->node_seq.maximum = 0;
13281 ctxt->node_seq.length = 0;
13282 ctxt->node_seq.buffer = NULL;
13283 xmlFreeParserCtxt(ctxt);
13284 newDoc->intSubset = NULL;
13285 newDoc->extSubset = NULL;
13286 xmlFreeDoc(newDoc);
13287
13288 return(ret);
13289 }
13290
13291 #ifdef LIBXML_SAX1_ENABLED
13292 /**
13293 * xmlParseExternalEntity:
13294 * @doc: the document the chunk pertains to
13295 * @sax: the SAX handler bloc (possibly NULL)
13296 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13297 * @depth: Used for loop detection, use 0
13298 * @URL: the URL for the entity to load
13299 * @ID: the System ID for the entity to load
13300 * @lst: the return value for the set of parsed nodes
13301 *
13302 * Parse an external general entity
13303 * An external general parsed entity is well-formed if it matches the
13304 * production labeled extParsedEnt.
13305 *
13306 * [78] extParsedEnt ::= TextDecl? content
13307 *
13308 * Returns 0 if the entity is well formed, -1 in case of args problem and
13309 * the parser error code otherwise
13310 */
13311
13312 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13313 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13314 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13315 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13316 ID, lst));
13317 }
13318
13319 /**
13320 * xmlParseBalancedChunkMemory:
13321 * @doc: the document the chunk pertains to
13322 * @sax: the SAX handler bloc (possibly NULL)
13323 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13324 * @depth: Used for loop detection, use 0
13325 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13326 * @lst: the return value for the set of parsed nodes
13327 *
13328 * Parse a well-balanced chunk of an XML document
13329 * called by the parser
13330 * The allowed sequence for the Well Balanced Chunk is the one defined by
13331 * the content production in the XML grammar:
13332 *
13333 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13334 *
13335 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13336 * the parser error code otherwise
13337 */
13338
13339 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13340 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13341 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13342 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13343 depth, string, lst, 0 );
13344 }
13345 #endif /* LIBXML_SAX1_ENABLED */
13346
13347 /**
13348 * xmlParseBalancedChunkMemoryInternal:
13349 * @oldctxt: the existing parsing context
13350 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13351 * @user_data: the user data field for the parser context
13352 * @lst: the return value for the set of parsed nodes
13353 *
13354 *
13355 * Parse a well-balanced chunk of an XML document
13356 * called by the parser
13357 * The allowed sequence for the Well Balanced Chunk is the one defined by
13358 * the content production in the XML grammar:
13359 *
13360 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13361 *
13362 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13363 * error code otherwise
13364 *
13365 * In case recover is set to 1, the nodelist will not be empty even if
13366 * the parsed chunk is not well balanced.
13367 */
13368 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13369 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13370 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13371 xmlParserCtxtPtr ctxt;
13372 xmlDocPtr newDoc = NULL;
13373 xmlNodePtr newRoot;
13374 xmlSAXHandlerPtr oldsax = NULL;
13375 xmlNodePtr content = NULL;
13376 xmlNodePtr last = NULL;
13377 int size;
13378 xmlParserErrors ret = XML_ERR_OK;
13379 #ifdef SAX2
13380 int i;
13381 #endif
13382
13383 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13384 (oldctxt->depth > 1024)) {
13385 return(XML_ERR_ENTITY_LOOP);
13386 }
13387
13388
13389 if (lst != NULL)
13390 *lst = NULL;
13391 if (string == NULL)
13392 return(XML_ERR_INTERNAL_ERROR);
13393
13394 size = xmlStrlen(string);
13395
13396 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13397 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13398 if (user_data != NULL)
13399 ctxt->userData = user_data;
13400 else
13401 ctxt->userData = ctxt;
13402 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13403 ctxt->dict = oldctxt->dict;
13404 ctxt->input_id = oldctxt->input_id + 1;
13405 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13406 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13407 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13408
13409 #ifdef SAX2
13410 /* propagate namespaces down the entity */
13411 for (i = 0;i < oldctxt->nsNr;i += 2) {
13412 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13413 }
13414 #endif
13415
13416 oldsax = ctxt->sax;
13417 ctxt->sax = oldctxt->sax;
13418 xmlDetectSAX2(ctxt);
13419 ctxt->replaceEntities = oldctxt->replaceEntities;
13420 ctxt->options = oldctxt->options;
13421
13422 ctxt->_private = oldctxt->_private;
13423 if (oldctxt->myDoc == NULL) {
13424 newDoc = xmlNewDoc(BAD_CAST "1.0");
13425 if (newDoc == NULL) {
13426 ctxt->sax = oldsax;
13427 ctxt->dict = NULL;
13428 xmlFreeParserCtxt(ctxt);
13429 return(XML_ERR_INTERNAL_ERROR);
13430 }
13431 newDoc->properties = XML_DOC_INTERNAL;
13432 newDoc->dict = ctxt->dict;
13433 xmlDictReference(newDoc->dict);
13434 ctxt->myDoc = newDoc;
13435 } else {
13436 ctxt->myDoc = oldctxt->myDoc;
13437 content = ctxt->myDoc->children;
13438 last = ctxt->myDoc->last;
13439 }
13440 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13441 if (newRoot == NULL) {
13442 ctxt->sax = oldsax;
13443 ctxt->dict = NULL;
13444 xmlFreeParserCtxt(ctxt);
13445 if (newDoc != NULL) {
13446 xmlFreeDoc(newDoc);
13447 }
13448 return(XML_ERR_INTERNAL_ERROR);
13449 }
13450 ctxt->myDoc->children = NULL;
13451 ctxt->myDoc->last = NULL;
13452 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13453 nodePush(ctxt, ctxt->myDoc->children);
13454 ctxt->instate = XML_PARSER_CONTENT;
13455 ctxt->depth = oldctxt->depth + 1;
13456
13457 ctxt->validate = 0;
13458 ctxt->loadsubset = oldctxt->loadsubset;
13459 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13460 /*
13461 * ID/IDREF registration will be done in xmlValidateElement below
13462 */
13463 ctxt->loadsubset |= XML_SKIP_IDS;
13464 }
13465 ctxt->dictNames = oldctxt->dictNames;
13466 ctxt->attsDefault = oldctxt->attsDefault;
13467 ctxt->attsSpecial = oldctxt->attsSpecial;
13468
13469 xmlParseContent(ctxt);
13470 if ((RAW == '<') && (NXT(1) == '/')) {
13471 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472 } else if (RAW != 0) {
13473 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474 }
13475 if (ctxt->node != ctxt->myDoc->children) {
13476 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477 }
13478
13479 if (!ctxt->wellFormed) {
13480 if (ctxt->errNo == 0)
13481 ret = XML_ERR_INTERNAL_ERROR;
13482 else
13483 ret = (xmlParserErrors)ctxt->errNo;
13484 } else {
13485 ret = XML_ERR_OK;
13486 }
13487
13488 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13489 xmlNodePtr cur;
13490
13491 /*
13492 * Return the newly created nodeset after unlinking it from
13493 * they pseudo parent.
13494 */
13495 cur = ctxt->myDoc->children->children;
13496 *lst = cur;
13497 while (cur != NULL) {
13498 #ifdef LIBXML_VALID_ENABLED
13499 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13500 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13501 (cur->type == XML_ELEMENT_NODE)) {
13502 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13503 oldctxt->myDoc, cur);
13504 }
13505 #endif /* LIBXML_VALID_ENABLED */
13506 cur->parent = NULL;
13507 cur = cur->next;
13508 }
13509 ctxt->myDoc->children->children = NULL;
13510 }
13511 if (ctxt->myDoc != NULL) {
13512 xmlFreeNode(ctxt->myDoc->children);
13513 ctxt->myDoc->children = content;
13514 ctxt->myDoc->last = last;
13515 }
13516
13517 /*
13518 * Record in the parent context the number of entities replacement
13519 * done when parsing that reference.
13520 */
13521 if (oldctxt != NULL)
13522 oldctxt->nbentities += ctxt->nbentities;
13523
13524 /*
13525 * Also record the last error if any
13526 */
13527 if (ctxt->lastError.code != XML_ERR_OK)
13528 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13529
13530 ctxt->sax = oldsax;
13531 ctxt->dict = NULL;
13532 ctxt->attsDefault = NULL;
13533 ctxt->attsSpecial = NULL;
13534 xmlFreeParserCtxt(ctxt);
13535 if (newDoc != NULL) {
13536 xmlFreeDoc(newDoc);
13537 }
13538
13539 return(ret);
13540 }
13541
13542 /**
13543 * xmlParseInNodeContext:
13544 * @node: the context node
13545 * @data: the input string
13546 * @datalen: the input string length in bytes
13547 * @options: a combination of xmlParserOption
13548 * @lst: the return value for the set of parsed nodes
13549 *
13550 * Parse a well-balanced chunk of an XML document
13551 * within the context (DTD, namespaces, etc ...) of the given node.
13552 *
13553 * The allowed sequence for the data is a Well Balanced Chunk defined by
13554 * the content production in the XML grammar:
13555 *
13556 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13557 *
13558 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13559 * error code otherwise
13560 */
13561 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13562 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13563 int options, xmlNodePtr *lst) {
13564 #ifdef SAX2
13565 xmlParserCtxtPtr ctxt;
13566 xmlDocPtr doc = NULL;
13567 xmlNodePtr fake, cur;
13568 int nsnr = 0;
13569
13570 xmlParserErrors ret = XML_ERR_OK;
13571
13572 /*
13573 * check all input parameters, grab the document
13574 */
13575 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13576 return(XML_ERR_INTERNAL_ERROR);
13577 switch (node->type) {
13578 case XML_ELEMENT_NODE:
13579 case XML_ATTRIBUTE_NODE:
13580 case XML_TEXT_NODE:
13581 case XML_CDATA_SECTION_NODE:
13582 case XML_ENTITY_REF_NODE:
13583 case XML_PI_NODE:
13584 case XML_COMMENT_NODE:
13585 case XML_DOCUMENT_NODE:
13586 case XML_HTML_DOCUMENT_NODE:
13587 break;
13588 default:
13589 return(XML_ERR_INTERNAL_ERROR);
13590
13591 }
13592 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13593 (node->type != XML_DOCUMENT_NODE) &&
13594 (node->type != XML_HTML_DOCUMENT_NODE))
13595 node = node->parent;
13596 if (node == NULL)
13597 return(XML_ERR_INTERNAL_ERROR);
13598 if (node->type == XML_ELEMENT_NODE)
13599 doc = node->doc;
13600 else
13601 doc = (xmlDocPtr) node;
13602 if (doc == NULL)
13603 return(XML_ERR_INTERNAL_ERROR);
13604
13605 /*
13606 * allocate a context and set-up everything not related to the
13607 * node position in the tree
13608 */
13609 if (doc->type == XML_DOCUMENT_NODE)
13610 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13611 #ifdef LIBXML_HTML_ENABLED
13612 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13613 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13614 /*
13615 * When parsing in context, it makes no sense to add implied
13616 * elements like html/body/etc...
13617 */
13618 options |= HTML_PARSE_NOIMPLIED;
13619 }
13620 #endif
13621 else
13622 return(XML_ERR_INTERNAL_ERROR);
13623
13624 if (ctxt == NULL)
13625 return(XML_ERR_NO_MEMORY);
13626
13627 /*
13628 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13629 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13630 * we must wait until the last moment to free the original one.
13631 */
13632 if (doc->dict != NULL) {
13633 if (ctxt->dict != NULL)
13634 xmlDictFree(ctxt->dict);
13635 ctxt->dict = doc->dict;
13636 } else
13637 options |= XML_PARSE_NODICT;
13638
13639 if (doc->encoding != NULL) {
13640 xmlCharEncodingHandlerPtr hdlr;
13641
13642 if (ctxt->encoding != NULL)
13643 xmlFree((xmlChar *) ctxt->encoding);
13644 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13645
13646 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13647 if (hdlr != NULL) {
13648 xmlSwitchToEncoding(ctxt, hdlr);
13649 } else {
13650 return(XML_ERR_UNSUPPORTED_ENCODING);
13651 }
13652 }
13653
13654 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13655 xmlDetectSAX2(ctxt);
13656 ctxt->myDoc = doc;
13657 /* parsing in context, i.e. as within existing content */
13658 ctxt->input_id = 2;
13659 ctxt->instate = XML_PARSER_CONTENT;
13660
13661 fake = xmlNewComment(NULL);
13662 if (fake == NULL) {
13663 xmlFreeParserCtxt(ctxt);
13664 return(XML_ERR_NO_MEMORY);
13665 }
13666 xmlAddChild(node, fake);
13667
13668 if (node->type == XML_ELEMENT_NODE) {
13669 nodePush(ctxt, node);
13670 /*
13671 * initialize the SAX2 namespaces stack
13672 */
13673 cur = node;
13674 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13675 xmlNsPtr ns = cur->nsDef;
13676 const xmlChar *iprefix, *ihref;
13677
13678 while (ns != NULL) {
13679 if (ctxt->dict) {
13680 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13681 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13682 } else {
13683 iprefix = ns->prefix;
13684 ihref = ns->href;
13685 }
13686
13687 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13688 nsPush(ctxt, iprefix, ihref);
13689 nsnr++;
13690 }
13691 ns = ns->next;
13692 }
13693 cur = cur->parent;
13694 }
13695 }
13696
13697 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13698 /*
13699 * ID/IDREF registration will be done in xmlValidateElement below
13700 */
13701 ctxt->loadsubset |= XML_SKIP_IDS;
13702 }
13703
13704 #ifdef LIBXML_HTML_ENABLED
13705 if (doc->type == XML_HTML_DOCUMENT_NODE)
13706 __htmlParseContent(ctxt);
13707 else
13708 #endif
13709 xmlParseContent(ctxt);
13710
13711 nsPop(ctxt, nsnr);
13712 if ((RAW == '<') && (NXT(1) == '/')) {
13713 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13714 } else if (RAW != 0) {
13715 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13716 }
13717 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13718 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13719 ctxt->wellFormed = 0;
13720 }
13721
13722 if (!ctxt->wellFormed) {
13723 if (ctxt->errNo == 0)
13724 ret = XML_ERR_INTERNAL_ERROR;
13725 else
13726 ret = (xmlParserErrors)ctxt->errNo;
13727 } else {
13728 ret = XML_ERR_OK;
13729 }
13730
13731 /*
13732 * Return the newly created nodeset after unlinking it from
13733 * the pseudo sibling.
13734 */
13735
13736 cur = fake->next;
13737 fake->next = NULL;
13738 node->last = fake;
13739
13740 if (cur != NULL) {
13741 cur->prev = NULL;
13742 }
13743
13744 *lst = cur;
13745
13746 while (cur != NULL) {
13747 cur->parent = NULL;
13748 cur = cur->next;
13749 }
13750
13751 xmlUnlinkNode(fake);
13752 xmlFreeNode(fake);
13753
13754
13755 if (ret != XML_ERR_OK) {
13756 xmlFreeNodeList(*lst);
13757 *lst = NULL;
13758 }
13759
13760 if (doc->dict != NULL)
13761 ctxt->dict = NULL;
13762 xmlFreeParserCtxt(ctxt);
13763
13764 return(ret);
13765 #else /* !SAX2 */
13766 return(XML_ERR_INTERNAL_ERROR);
13767 #endif
13768 }
13769
13770 #ifdef LIBXML_SAX1_ENABLED
13771 /**
13772 * xmlParseBalancedChunkMemoryRecover:
13773 * @doc: the document the chunk pertains to
13774 * @sax: the SAX handler bloc (possibly NULL)
13775 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13776 * @depth: Used for loop detection, use 0
13777 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13778 * @lst: the return value for the set of parsed nodes
13779 * @recover: return nodes even if the data is broken (use 0)
13780 *
13781 *
13782 * Parse a well-balanced chunk of an XML document
13783 * called by the parser
13784 * The allowed sequence for the Well Balanced Chunk is the one defined by
13785 * the content production in the XML grammar:
13786 *
13787 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13788 *
13789 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13790 * the parser error code otherwise
13791 *
13792 * In case recover is set to 1, the nodelist will not be empty even if
13793 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13794 * some extent.
13795 */
13796 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13797 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13798 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13799 int recover) {
13800 xmlParserCtxtPtr ctxt;
13801 xmlDocPtr newDoc;
13802 xmlSAXHandlerPtr oldsax = NULL;
13803 xmlNodePtr content, newRoot;
13804 int size;
13805 int ret = 0;
13806
13807 if (depth > 40) {
13808 return(XML_ERR_ENTITY_LOOP);
13809 }
13810
13811
13812 if (lst != NULL)
13813 *lst = NULL;
13814 if (string == NULL)
13815 return(-1);
13816
13817 size = xmlStrlen(string);
13818
13819 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13820 if (ctxt == NULL) return(-1);
13821 ctxt->userData = ctxt;
13822 if (sax != NULL) {
13823 oldsax = ctxt->sax;
13824 ctxt->sax = sax;
13825 if (user_data != NULL)
13826 ctxt->userData = user_data;
13827 }
13828 newDoc = xmlNewDoc(BAD_CAST "1.0");
13829 if (newDoc == NULL) {
13830 xmlFreeParserCtxt(ctxt);
13831 return(-1);
13832 }
13833 newDoc->properties = XML_DOC_INTERNAL;
13834 if ((doc != NULL) && (doc->dict != NULL)) {
13835 xmlDictFree(ctxt->dict);
13836 ctxt->dict = doc->dict;
13837 xmlDictReference(ctxt->dict);
13838 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13839 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13840 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13841 ctxt->dictNames = 1;
13842 } else {
13843 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13844 }
13845 if (doc != NULL) {
13846 newDoc->intSubset = doc->intSubset;
13847 newDoc->extSubset = doc->extSubset;
13848 }
13849 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13850 if (newRoot == NULL) {
13851 if (sax != NULL)
13852 ctxt->sax = oldsax;
13853 xmlFreeParserCtxt(ctxt);
13854 newDoc->intSubset = NULL;
13855 newDoc->extSubset = NULL;
13856 xmlFreeDoc(newDoc);
13857 return(-1);
13858 }
13859 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13860 nodePush(ctxt, newRoot);
13861 if (doc == NULL) {
13862 ctxt->myDoc = newDoc;
13863 } else {
13864 ctxt->myDoc = newDoc;
13865 newDoc->children->doc = doc;
13866 /* Ensure that doc has XML spec namespace */
13867 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13868 newDoc->oldNs = doc->oldNs;
13869 }
13870 ctxt->instate = XML_PARSER_CONTENT;
13871 ctxt->input_id = 2;
13872 ctxt->depth = depth;
13873
13874 /*
13875 * Doing validity checking on chunk doesn't make sense
13876 */
13877 ctxt->validate = 0;
13878 ctxt->loadsubset = 0;
13879 xmlDetectSAX2(ctxt);
13880
13881 if ( doc != NULL ){
13882 content = doc->children;
13883 doc->children = NULL;
13884 xmlParseContent(ctxt);
13885 doc->children = content;
13886 }
13887 else {
13888 xmlParseContent(ctxt);
13889 }
13890 if ((RAW == '<') && (NXT(1) == '/')) {
13891 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13892 } else if (RAW != 0) {
13893 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13894 }
13895 if (ctxt->node != newDoc->children) {
13896 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13897 }
13898
13899 if (!ctxt->wellFormed) {
13900 if (ctxt->errNo == 0)
13901 ret = 1;
13902 else
13903 ret = ctxt->errNo;
13904 } else {
13905 ret = 0;
13906 }
13907
13908 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13909 xmlNodePtr cur;
13910
13911 /*
13912 * Return the newly created nodeset after unlinking it from
13913 * they pseudo parent.
13914 */
13915 cur = newDoc->children->children;
13916 *lst = cur;
13917 while (cur != NULL) {
13918 xmlSetTreeDoc(cur, doc);
13919 cur->parent = NULL;
13920 cur = cur->next;
13921 }
13922 newDoc->children->children = NULL;
13923 }
13924
13925 if (sax != NULL)
13926 ctxt->sax = oldsax;
13927 xmlFreeParserCtxt(ctxt);
13928 newDoc->intSubset = NULL;
13929 newDoc->extSubset = NULL;
13930 newDoc->oldNs = NULL;
13931 xmlFreeDoc(newDoc);
13932
13933 return(ret);
13934 }
13935
13936 /**
13937 * xmlSAXParseEntity:
13938 * @sax: the SAX handler block
13939 * @filename: the filename
13940 *
13941 * parse an XML external entity out of context and build a tree.
13942 * It use the given SAX function block to handle the parsing callback.
13943 * If sax is NULL, fallback to the default DOM tree building routines.
13944 *
13945 * [78] extParsedEnt ::= TextDecl? content
13946 *
13947 * This correspond to a "Well Balanced" chunk
13948 *
13949 * Returns the resulting document tree
13950 */
13951
13952 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13953 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13954 xmlDocPtr ret;
13955 xmlParserCtxtPtr ctxt;
13956
13957 ctxt = xmlCreateFileParserCtxt(filename);
13958 if (ctxt == NULL) {
13959 return(NULL);
13960 }
13961 if (sax != NULL) {
13962 if (ctxt->sax != NULL)
13963 xmlFree(ctxt->sax);
13964 ctxt->sax = sax;
13965 ctxt->userData = NULL;
13966 }
13967
13968 xmlParseExtParsedEnt(ctxt);
13969
13970 if (ctxt->wellFormed)
13971 ret = ctxt->myDoc;
13972 else {
13973 ret = NULL;
13974 xmlFreeDoc(ctxt->myDoc);
13975 ctxt->myDoc = NULL;
13976 }
13977 if (sax != NULL)
13978 ctxt->sax = NULL;
13979 xmlFreeParserCtxt(ctxt);
13980
13981 return(ret);
13982 }
13983
13984 /**
13985 * xmlParseEntity:
13986 * @filename: the filename
13987 *
13988 * parse an XML external entity out of context and build a tree.
13989 *
13990 * [78] extParsedEnt ::= TextDecl? content
13991 *
13992 * This correspond to a "Well Balanced" chunk
13993 *
13994 * Returns the resulting document tree
13995 */
13996
13997 xmlDocPtr
xmlParseEntity(const char * filename)13998 xmlParseEntity(const char *filename) {
13999 return(xmlSAXParseEntity(NULL, filename));
14000 }
14001 #endif /* LIBXML_SAX1_ENABLED */
14002
14003 /**
14004 * xmlCreateEntityParserCtxtInternal:
14005 * @URL: the entity URL
14006 * @ID: the entity PUBLIC ID
14007 * @base: a possible base for the target URI
14008 * @pctx: parser context used to set options on new context
14009 *
14010 * Create a parser context for an external entity
14011 * Automatic support for ZLIB/Compress compressed document is provided
14012 * by default if found at compile-time.
14013 *
14014 * Returns the new parser context or NULL
14015 */
14016 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)14017 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14018 const xmlChar *base, xmlParserCtxtPtr pctx) {
14019 xmlParserCtxtPtr ctxt;
14020 xmlParserInputPtr inputStream;
14021 char *directory = NULL;
14022 xmlChar *uri;
14023
14024 ctxt = xmlNewParserCtxt();
14025 if (ctxt == NULL) {
14026 return(NULL);
14027 }
14028
14029 if (pctx != NULL) {
14030 ctxt->options = pctx->options;
14031 ctxt->_private = pctx->_private;
14032 /*
14033 * this is a subparser of pctx, so the input_id should be
14034 * incremented to distinguish from main entity
14035 */
14036 ctxt->input_id = pctx->input_id + 1;
14037 }
14038
14039 uri = xmlBuildURI(URL, base);
14040
14041 if (uri == NULL) {
14042 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14043 if (inputStream == NULL) {
14044 xmlFreeParserCtxt(ctxt);
14045 return(NULL);
14046 }
14047
14048 inputPush(ctxt, inputStream);
14049
14050 if ((ctxt->directory == NULL) && (directory == NULL))
14051 directory = xmlParserGetDirectory((char *)URL);
14052 if ((ctxt->directory == NULL) && (directory != NULL))
14053 ctxt->directory = directory;
14054 } else {
14055 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14056 if (inputStream == NULL) {
14057 xmlFree(uri);
14058 xmlFreeParserCtxt(ctxt);
14059 return(NULL);
14060 }
14061
14062 inputPush(ctxt, inputStream);
14063
14064 if ((ctxt->directory == NULL) && (directory == NULL))
14065 directory = xmlParserGetDirectory((char *)uri);
14066 if ((ctxt->directory == NULL) && (directory != NULL))
14067 ctxt->directory = directory;
14068 xmlFree(uri);
14069 }
14070 return(ctxt);
14071 }
14072
14073 /**
14074 * xmlCreateEntityParserCtxt:
14075 * @URL: the entity URL
14076 * @ID: the entity PUBLIC ID
14077 * @base: a possible base for the target URI
14078 *
14079 * Create a parser context for an external entity
14080 * Automatic support for ZLIB/Compress compressed document is provided
14081 * by default if found at compile-time.
14082 *
14083 * Returns the new parser context or NULL
14084 */
14085 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14086 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14087 const xmlChar *base) {
14088 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14089
14090 }
14091
14092 /************************************************************************
14093 * *
14094 * Front ends when parsing from a file *
14095 * *
14096 ************************************************************************/
14097
14098 /**
14099 * xmlCreateURLParserCtxt:
14100 * @filename: the filename or URL
14101 * @options: a combination of xmlParserOption
14102 *
14103 * Create a parser context for a file or URL content.
14104 * Automatic support for ZLIB/Compress compressed document is provided
14105 * by default if found at compile-time and for file accesses
14106 *
14107 * Returns the new parser context or NULL
14108 */
14109 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14110 xmlCreateURLParserCtxt(const char *filename, int options)
14111 {
14112 xmlParserCtxtPtr ctxt;
14113 xmlParserInputPtr inputStream;
14114 char *directory = NULL;
14115
14116 ctxt = xmlNewParserCtxt();
14117 if (ctxt == NULL) {
14118 xmlErrMemory(NULL, "cannot allocate parser context");
14119 return(NULL);
14120 }
14121
14122 if (options)
14123 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14124 ctxt->linenumbers = 1;
14125
14126 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14127 if (inputStream == NULL) {
14128 xmlFreeParserCtxt(ctxt);
14129 return(NULL);
14130 }
14131
14132 inputPush(ctxt, inputStream);
14133 if ((ctxt->directory == NULL) && (directory == NULL))
14134 directory = xmlParserGetDirectory(filename);
14135 if ((ctxt->directory == NULL) && (directory != NULL))
14136 ctxt->directory = directory;
14137
14138 return(ctxt);
14139 }
14140
14141 /**
14142 * xmlCreateFileParserCtxt:
14143 * @filename: the filename
14144 *
14145 * Create a parser context for a file content.
14146 * Automatic support for ZLIB/Compress compressed document is provided
14147 * by default if found at compile-time.
14148 *
14149 * Returns the new parser context or NULL
14150 */
14151 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14152 xmlCreateFileParserCtxt(const char *filename)
14153 {
14154 return(xmlCreateURLParserCtxt(filename, 0));
14155 }
14156
14157 #ifdef LIBXML_SAX1_ENABLED
14158 /**
14159 * xmlSAXParseFileWithData:
14160 * @sax: the SAX handler block
14161 * @filename: the filename
14162 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14163 * documents
14164 * @data: the userdata
14165 *
14166 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167 * compressed document is provided by default if found at compile-time.
14168 * It use the given SAX function block to handle the parsing callback.
14169 * If sax is NULL, fallback to the default DOM tree building routines.
14170 *
14171 * User data (void *) is stored within the parser context in the
14172 * context's _private member, so it is available nearly everywhere in libxml
14173 *
14174 * Returns the resulting document tree
14175 */
14176
14177 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14178 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14179 int recovery, void *data) {
14180 xmlDocPtr ret;
14181 xmlParserCtxtPtr ctxt;
14182
14183 xmlInitParser();
14184
14185 ctxt = xmlCreateFileParserCtxt(filename);
14186 if (ctxt == NULL) {
14187 return(NULL);
14188 }
14189 if (sax != NULL) {
14190 if (ctxt->sax != NULL)
14191 xmlFree(ctxt->sax);
14192 ctxt->sax = sax;
14193 }
14194 xmlDetectSAX2(ctxt);
14195 if (data!=NULL) {
14196 ctxt->_private = data;
14197 }
14198
14199 if (ctxt->directory == NULL)
14200 ctxt->directory = xmlParserGetDirectory(filename);
14201
14202 ctxt->recovery = recovery;
14203
14204 xmlParseDocument(ctxt);
14205
14206 if ((ctxt->wellFormed) || recovery) {
14207 ret = ctxt->myDoc;
14208 if (ret != NULL) {
14209 if (ctxt->input->buf->compressed > 0)
14210 ret->compression = 9;
14211 else
14212 ret->compression = ctxt->input->buf->compressed;
14213 }
14214 }
14215 else {
14216 ret = NULL;
14217 xmlFreeDoc(ctxt->myDoc);
14218 ctxt->myDoc = NULL;
14219 }
14220 if (sax != NULL)
14221 ctxt->sax = NULL;
14222 xmlFreeParserCtxt(ctxt);
14223
14224 return(ret);
14225 }
14226
14227 /**
14228 * xmlSAXParseFile:
14229 * @sax: the SAX handler block
14230 * @filename: the filename
14231 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14232 * documents
14233 *
14234 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14235 * compressed document is provided by default if found at compile-time.
14236 * It use the given SAX function block to handle the parsing callback.
14237 * If sax is NULL, fallback to the default DOM tree building routines.
14238 *
14239 * Returns the resulting document tree
14240 */
14241
14242 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14243 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14244 int recovery) {
14245 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14246 }
14247
14248 /**
14249 * xmlRecoverDoc:
14250 * @cur: a pointer to an array of xmlChar
14251 *
14252 * parse an XML in-memory document and build a tree.
14253 * In the case the document is not Well Formed, a attempt to build a
14254 * tree is tried anyway
14255 *
14256 * Returns the resulting document tree or NULL in case of failure
14257 */
14258
14259 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14260 xmlRecoverDoc(const xmlChar *cur) {
14261 return(xmlSAXParseDoc(NULL, cur, 1));
14262 }
14263
14264 /**
14265 * xmlParseFile:
14266 * @filename: the filename
14267 *
14268 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14269 * compressed document is provided by default if found at compile-time.
14270 *
14271 * Returns the resulting document tree if the file was wellformed,
14272 * NULL otherwise.
14273 */
14274
14275 xmlDocPtr
xmlParseFile(const char * filename)14276 xmlParseFile(const char *filename) {
14277 return(xmlSAXParseFile(NULL, filename, 0));
14278 }
14279
14280 /**
14281 * xmlRecoverFile:
14282 * @filename: the filename
14283 *
14284 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14285 * compressed document is provided by default if found at compile-time.
14286 * In the case the document is not Well Formed, it attempts to build
14287 * a tree anyway
14288 *
14289 * Returns the resulting document tree or NULL in case of failure
14290 */
14291
14292 xmlDocPtr
xmlRecoverFile(const char * filename)14293 xmlRecoverFile(const char *filename) {
14294 return(xmlSAXParseFile(NULL, filename, 1));
14295 }
14296
14297
14298 /**
14299 * xmlSetupParserForBuffer:
14300 * @ctxt: an XML parser context
14301 * @buffer: a xmlChar * buffer
14302 * @filename: a file name
14303 *
14304 * Setup the parser context to parse a new buffer; Clears any prior
14305 * contents from the parser context. The buffer parameter must not be
14306 * NULL, but the filename parameter can be
14307 */
14308 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14309 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14310 const char* filename)
14311 {
14312 xmlParserInputPtr input;
14313
14314 if ((ctxt == NULL) || (buffer == NULL))
14315 return;
14316
14317 input = xmlNewInputStream(ctxt);
14318 if (input == NULL) {
14319 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14320 xmlClearParserCtxt(ctxt);
14321 return;
14322 }
14323
14324 xmlClearParserCtxt(ctxt);
14325 if (filename != NULL)
14326 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14327 input->base = buffer;
14328 input->cur = buffer;
14329 input->end = &buffer[xmlStrlen(buffer)];
14330 inputPush(ctxt, input);
14331 }
14332
14333 /**
14334 * xmlSAXUserParseFile:
14335 * @sax: a SAX handler
14336 * @user_data: The user data returned on SAX callbacks
14337 * @filename: a file name
14338 *
14339 * parse an XML file and call the given SAX handler routines.
14340 * Automatic support for ZLIB/Compress compressed document is provided
14341 *
14342 * Returns 0 in case of success or a error number otherwise
14343 */
14344 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14345 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14346 const char *filename) {
14347 int ret = 0;
14348 xmlParserCtxtPtr ctxt;
14349
14350 ctxt = xmlCreateFileParserCtxt(filename);
14351 if (ctxt == NULL) return -1;
14352 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14353 xmlFree(ctxt->sax);
14354 ctxt->sax = sax;
14355 xmlDetectSAX2(ctxt);
14356
14357 if (user_data != NULL)
14358 ctxt->userData = user_data;
14359
14360 xmlParseDocument(ctxt);
14361
14362 if (ctxt->wellFormed)
14363 ret = 0;
14364 else {
14365 if (ctxt->errNo != 0)
14366 ret = ctxt->errNo;
14367 else
14368 ret = -1;
14369 }
14370 if (sax != NULL)
14371 ctxt->sax = NULL;
14372 if (ctxt->myDoc != NULL) {
14373 xmlFreeDoc(ctxt->myDoc);
14374 ctxt->myDoc = NULL;
14375 }
14376 xmlFreeParserCtxt(ctxt);
14377
14378 return ret;
14379 }
14380 #endif /* LIBXML_SAX1_ENABLED */
14381
14382 /************************************************************************
14383 * *
14384 * Front ends when parsing from memory *
14385 * *
14386 ************************************************************************/
14387
14388 /**
14389 * xmlCreateMemoryParserCtxt:
14390 * @buffer: a pointer to a char array
14391 * @size: the size of the array
14392 *
14393 * Create a parser context for an XML in-memory document.
14394 *
14395 * Returns the new parser context or NULL
14396 */
14397 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14398 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14399 xmlParserCtxtPtr ctxt;
14400 xmlParserInputPtr input;
14401 xmlParserInputBufferPtr buf;
14402
14403 if (buffer == NULL)
14404 return(NULL);
14405 if (size <= 0)
14406 return(NULL);
14407
14408 ctxt = xmlNewParserCtxt();
14409 if (ctxt == NULL)
14410 return(NULL);
14411
14412 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14413 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14414 if (buf == NULL) {
14415 xmlFreeParserCtxt(ctxt);
14416 return(NULL);
14417 }
14418
14419 input = xmlNewInputStream(ctxt);
14420 if (input == NULL) {
14421 xmlFreeParserInputBuffer(buf);
14422 xmlFreeParserCtxt(ctxt);
14423 return(NULL);
14424 }
14425
14426 input->filename = NULL;
14427 input->buf = buf;
14428 xmlBufResetInput(input->buf->buffer, input);
14429
14430 inputPush(ctxt, input);
14431 return(ctxt);
14432 }
14433
14434 #ifdef LIBXML_SAX1_ENABLED
14435 /**
14436 * xmlSAXParseMemoryWithData:
14437 * @sax: the SAX handler block
14438 * @buffer: an pointer to a char array
14439 * @size: the size of the array
14440 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14441 * documents
14442 * @data: the userdata
14443 *
14444 * parse an XML in-memory block and use the given SAX function block
14445 * to handle the parsing callback. If sax is NULL, fallback to the default
14446 * DOM tree building routines.
14447 *
14448 * User data (void *) is stored within the parser context in the
14449 * context's _private member, so it is available nearly everywhere in libxml
14450 *
14451 * Returns the resulting document tree
14452 */
14453
14454 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14455 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14456 int size, int recovery, void *data) {
14457 xmlDocPtr ret;
14458 xmlParserCtxtPtr ctxt;
14459
14460 xmlInitParser();
14461
14462 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14463 if (ctxt == NULL) return(NULL);
14464 if (sax != NULL) {
14465 if (ctxt->sax != NULL)
14466 xmlFree(ctxt->sax);
14467 ctxt->sax = sax;
14468 }
14469 xmlDetectSAX2(ctxt);
14470 if (data!=NULL) {
14471 ctxt->_private=data;
14472 }
14473
14474 ctxt->recovery = recovery;
14475
14476 xmlParseDocument(ctxt);
14477
14478 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14479 else {
14480 ret = NULL;
14481 xmlFreeDoc(ctxt->myDoc);
14482 ctxt->myDoc = NULL;
14483 }
14484 if (sax != NULL)
14485 ctxt->sax = NULL;
14486 xmlFreeParserCtxt(ctxt);
14487
14488 return(ret);
14489 }
14490
14491 /**
14492 * xmlSAXParseMemory:
14493 * @sax: the SAX handler block
14494 * @buffer: an pointer to a char array
14495 * @size: the size of the array
14496 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14497 * documents
14498 *
14499 * parse an XML in-memory block and use the given SAX function block
14500 * to handle the parsing callback. If sax is NULL, fallback to the default
14501 * DOM tree building routines.
14502 *
14503 * Returns the resulting document tree
14504 */
14505 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14506 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14507 int size, int recovery) {
14508 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14509 }
14510
14511 /**
14512 * xmlParseMemory:
14513 * @buffer: an pointer to a char array
14514 * @size: the size of the array
14515 *
14516 * parse an XML in-memory block and build a tree.
14517 *
14518 * Returns the resulting document tree
14519 */
14520
xmlParseMemory(const char * buffer,int size)14521 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14522 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14523 }
14524
14525 /**
14526 * xmlRecoverMemory:
14527 * @buffer: an pointer to a char array
14528 * @size: the size of the array
14529 *
14530 * parse an XML in-memory block and build a tree.
14531 * In the case the document is not Well Formed, an attempt to
14532 * build a tree is tried anyway
14533 *
14534 * Returns the resulting document tree or NULL in case of error
14535 */
14536
xmlRecoverMemory(const char * buffer,int size)14537 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14538 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14539 }
14540
14541 /**
14542 * xmlSAXUserParseMemory:
14543 * @sax: a SAX handler
14544 * @user_data: The user data returned on SAX callbacks
14545 * @buffer: an in-memory XML document input
14546 * @size: the length of the XML document in bytes
14547 *
14548 * A better SAX parsing routine.
14549 * parse an XML in-memory buffer and call the given SAX handler routines.
14550 *
14551 * Returns 0 in case of success or a error number otherwise
14552 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14553 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14554 const char *buffer, int size) {
14555 int ret = 0;
14556 xmlParserCtxtPtr ctxt;
14557
14558 xmlInitParser();
14559
14560 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14561 if (ctxt == NULL) return -1;
14562 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14563 xmlFree(ctxt->sax);
14564 ctxt->sax = sax;
14565 xmlDetectSAX2(ctxt);
14566
14567 if (user_data != NULL)
14568 ctxt->userData = user_data;
14569
14570 xmlParseDocument(ctxt);
14571
14572 if (ctxt->wellFormed)
14573 ret = 0;
14574 else {
14575 if (ctxt->errNo != 0)
14576 ret = ctxt->errNo;
14577 else
14578 ret = -1;
14579 }
14580 if (sax != NULL)
14581 ctxt->sax = NULL;
14582 if (ctxt->myDoc != NULL) {
14583 xmlFreeDoc(ctxt->myDoc);
14584 ctxt->myDoc = NULL;
14585 }
14586 xmlFreeParserCtxt(ctxt);
14587
14588 return ret;
14589 }
14590 #endif /* LIBXML_SAX1_ENABLED */
14591
14592 /**
14593 * xmlCreateDocParserCtxt:
14594 * @cur: a pointer to an array of xmlChar
14595 *
14596 * Creates a parser context for an XML in-memory document.
14597 *
14598 * Returns the new parser context or NULL
14599 */
14600 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14601 xmlCreateDocParserCtxt(const xmlChar *cur) {
14602 int len;
14603
14604 if (cur == NULL)
14605 return(NULL);
14606 len = xmlStrlen(cur);
14607 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14608 }
14609
14610 #ifdef LIBXML_SAX1_ENABLED
14611 /**
14612 * xmlSAXParseDoc:
14613 * @sax: the SAX handler block
14614 * @cur: a pointer to an array of xmlChar
14615 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14616 * documents
14617 *
14618 * parse an XML in-memory document and build a tree.
14619 * It use the given SAX function block to handle the parsing callback.
14620 * If sax is NULL, fallback to the default DOM tree building routines.
14621 *
14622 * Returns the resulting document tree
14623 */
14624
14625 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14626 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14627 xmlDocPtr ret;
14628 xmlParserCtxtPtr ctxt;
14629 xmlSAXHandlerPtr oldsax = NULL;
14630
14631 if (cur == NULL) return(NULL);
14632
14633
14634 ctxt = xmlCreateDocParserCtxt(cur);
14635 if (ctxt == NULL) return(NULL);
14636 if (sax != NULL) {
14637 oldsax = ctxt->sax;
14638 ctxt->sax = sax;
14639 ctxt->userData = NULL;
14640 }
14641 xmlDetectSAX2(ctxt);
14642
14643 xmlParseDocument(ctxt);
14644 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14645 else {
14646 ret = NULL;
14647 xmlFreeDoc(ctxt->myDoc);
14648 ctxt->myDoc = NULL;
14649 }
14650 if (sax != NULL)
14651 ctxt->sax = oldsax;
14652 xmlFreeParserCtxt(ctxt);
14653
14654 return(ret);
14655 }
14656
14657 /**
14658 * xmlParseDoc:
14659 * @cur: a pointer to an array of xmlChar
14660 *
14661 * parse an XML in-memory document and build a tree.
14662 *
14663 * Returns the resulting document tree
14664 */
14665
14666 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14667 xmlParseDoc(const xmlChar *cur) {
14668 return(xmlSAXParseDoc(NULL, cur, 0));
14669 }
14670 #endif /* LIBXML_SAX1_ENABLED */
14671
14672 #ifdef LIBXML_LEGACY_ENABLED
14673 /************************************************************************
14674 * *
14675 * Specific function to keep track of entities references *
14676 * and used by the XSLT debugger *
14677 * *
14678 ************************************************************************/
14679
14680 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14681
14682 /**
14683 * xmlAddEntityReference:
14684 * @ent : A valid entity
14685 * @firstNode : A valid first node for children of entity
14686 * @lastNode : A valid last node of children entity
14687 *
14688 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14689 */
14690 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14691 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14692 xmlNodePtr lastNode)
14693 {
14694 if (xmlEntityRefFunc != NULL) {
14695 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14696 }
14697 }
14698
14699
14700 /**
14701 * xmlSetEntityReferenceFunc:
14702 * @func: A valid function
14703 *
14704 * Set the function to call call back when a xml reference has been made
14705 */
14706 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14707 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14708 {
14709 xmlEntityRefFunc = func;
14710 }
14711 #endif /* LIBXML_LEGACY_ENABLED */
14712
14713 /************************************************************************
14714 * *
14715 * Miscellaneous *
14716 * *
14717 ************************************************************************/
14718
14719 #ifdef LIBXML_XPATH_ENABLED
14720 #include <libxml/xpath.h>
14721 #endif
14722
14723 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14724 static int xmlParserInitialized = 0;
14725
14726 /**
14727 * xmlInitParser:
14728 *
14729 * Initialization function for the XML parser.
14730 * This is not reentrant. Call once before processing in case of
14731 * use in multithreaded programs.
14732 */
14733
14734 void
xmlInitParser(void)14735 xmlInitParser(void) {
14736 if (xmlParserInitialized != 0)
14737 return;
14738
14739 #ifdef LIBXML_THREAD_ENABLED
14740 __xmlGlobalInitMutexLock();
14741 if (xmlParserInitialized == 0) {
14742 #endif
14743 xmlInitThreads();
14744 xmlInitGlobals();
14745 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14746 (xmlGenericError == NULL))
14747 initGenericErrorDefaultFunc(NULL);
14748 xmlInitMemory();
14749 xmlInitializeDict();
14750 xmlInitCharEncodingHandlers();
14751 xmlDefaultSAXHandlerInit();
14752 xmlRegisterDefaultInputCallbacks();
14753 #ifdef LIBXML_OUTPUT_ENABLED
14754 xmlRegisterDefaultOutputCallbacks();
14755 #endif /* LIBXML_OUTPUT_ENABLED */
14756 #ifdef LIBXML_HTML_ENABLED
14757 htmlInitAutoClose();
14758 htmlDefaultSAXHandlerInit();
14759 #endif
14760 #ifdef LIBXML_XPATH_ENABLED
14761 xmlXPathInit();
14762 #endif
14763 xmlParserInitialized = 1;
14764 #ifdef LIBXML_THREAD_ENABLED
14765 }
14766 __xmlGlobalInitMutexUnlock();
14767 #endif
14768 }
14769
14770 /**
14771 * xmlCleanupParser:
14772 *
14773 * This function name is somewhat misleading. It does not clean up
14774 * parser state, it cleans up memory allocated by the library itself.
14775 * It is a cleanup function for the XML library. It tries to reclaim all
14776 * related global memory allocated for the library processing.
14777 * It doesn't deallocate any document related memory. One should
14778 * call xmlCleanupParser() only when the process has finished using
14779 * the library and all XML/HTML documents built with it.
14780 * See also xmlInitParser() which has the opposite function of preparing
14781 * the library for operations.
14782 *
14783 * WARNING: if your application is multithreaded or has plugin support
14784 * calling this may crash the application if another thread or
14785 * a plugin is still using libxml2. It's sometimes very hard to
14786 * guess if libxml2 is in use in the application, some libraries
14787 * or plugins may use it without notice. In case of doubt abstain
14788 * from calling this function or do it just before calling exit()
14789 * to avoid leak reports from valgrind !
14790 */
14791
14792 void
xmlCleanupParser(void)14793 xmlCleanupParser(void) {
14794 if (!xmlParserInitialized)
14795 return;
14796
14797 xmlCleanupCharEncodingHandlers();
14798 #ifdef LIBXML_CATALOG_ENABLED
14799 xmlCatalogCleanup();
14800 #endif
14801 xmlDictCleanup();
14802 xmlCleanupInputCallbacks();
14803 #ifdef LIBXML_OUTPUT_ENABLED
14804 xmlCleanupOutputCallbacks();
14805 #endif
14806 #ifdef LIBXML_SCHEMAS_ENABLED
14807 xmlSchemaCleanupTypes();
14808 xmlRelaxNGCleanupTypes();
14809 #endif
14810 xmlResetLastError();
14811 xmlCleanupGlobals();
14812 xmlCleanupThreads(); /* must be last if called not from the main thread */
14813 xmlCleanupMemory();
14814 xmlParserInitialized = 0;
14815 }
14816
14817 /************************************************************************
14818 * *
14819 * New set (2.6.0) of simpler and more flexible APIs *
14820 * *
14821 ************************************************************************/
14822
14823 /**
14824 * DICT_FREE:
14825 * @str: a string
14826 *
14827 * Free a string if it is not owned by the "dict" dictionary in the
14828 * current scope
14829 */
14830 #define DICT_FREE(str) \
14831 if ((str) && ((!dict) || \
14832 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14833 xmlFree((char *)(str));
14834
14835 /**
14836 * xmlCtxtReset:
14837 * @ctxt: an XML parser context
14838 *
14839 * Reset a parser context
14840 */
14841 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14842 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14843 {
14844 xmlParserInputPtr input;
14845 xmlDictPtr dict;
14846
14847 if (ctxt == NULL)
14848 return;
14849
14850 dict = ctxt->dict;
14851
14852 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14853 xmlFreeInputStream(input);
14854 }
14855 ctxt->inputNr = 0;
14856 ctxt->input = NULL;
14857
14858 ctxt->spaceNr = 0;
14859 if (ctxt->spaceTab != NULL) {
14860 ctxt->spaceTab[0] = -1;
14861 ctxt->space = &ctxt->spaceTab[0];
14862 } else {
14863 ctxt->space = NULL;
14864 }
14865
14866
14867 ctxt->nodeNr = 0;
14868 ctxt->node = NULL;
14869
14870 ctxt->nameNr = 0;
14871 ctxt->name = NULL;
14872
14873 DICT_FREE(ctxt->version);
14874 ctxt->version = NULL;
14875 DICT_FREE(ctxt->encoding);
14876 ctxt->encoding = NULL;
14877 DICT_FREE(ctxt->directory);
14878 ctxt->directory = NULL;
14879 DICT_FREE(ctxt->extSubURI);
14880 ctxt->extSubURI = NULL;
14881 DICT_FREE(ctxt->extSubSystem);
14882 ctxt->extSubSystem = NULL;
14883 if (ctxt->myDoc != NULL)
14884 xmlFreeDoc(ctxt->myDoc);
14885 ctxt->myDoc = NULL;
14886
14887 ctxt->standalone = -1;
14888 ctxt->hasExternalSubset = 0;
14889 ctxt->hasPErefs = 0;
14890 ctxt->html = 0;
14891 ctxt->external = 0;
14892 ctxt->instate = XML_PARSER_START;
14893 ctxt->token = 0;
14894
14895 ctxt->wellFormed = 1;
14896 ctxt->nsWellFormed = 1;
14897 ctxt->disableSAX = 0;
14898 ctxt->valid = 1;
14899 #if 0
14900 ctxt->vctxt.userData = ctxt;
14901 ctxt->vctxt.error = xmlParserValidityError;
14902 ctxt->vctxt.warning = xmlParserValidityWarning;
14903 #endif
14904 ctxt->record_info = 0;
14905 ctxt->nbChars = 0;
14906 ctxt->checkIndex = 0;
14907 ctxt->inSubset = 0;
14908 ctxt->errNo = XML_ERR_OK;
14909 ctxt->depth = 0;
14910 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14911 ctxt->catalogs = NULL;
14912 ctxt->nbentities = 0;
14913 ctxt->sizeentities = 0;
14914 ctxt->sizeentcopy = 0;
14915 xmlInitNodeInfoSeq(&ctxt->node_seq);
14916
14917 if (ctxt->attsDefault != NULL) {
14918 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14919 ctxt->attsDefault = NULL;
14920 }
14921 if (ctxt->attsSpecial != NULL) {
14922 xmlHashFree(ctxt->attsSpecial, NULL);
14923 ctxt->attsSpecial = NULL;
14924 }
14925
14926 #ifdef LIBXML_CATALOG_ENABLED
14927 if (ctxt->catalogs != NULL)
14928 xmlCatalogFreeLocal(ctxt->catalogs);
14929 #endif
14930 if (ctxt->lastError.code != XML_ERR_OK)
14931 xmlResetError(&ctxt->lastError);
14932 }
14933
14934 /**
14935 * xmlCtxtResetPush:
14936 * @ctxt: an XML parser context
14937 * @chunk: a pointer to an array of chars
14938 * @size: number of chars in the array
14939 * @filename: an optional file name or URI
14940 * @encoding: the document encoding, or NULL
14941 *
14942 * Reset a push parser context
14943 *
14944 * Returns 0 in case of success and 1 in case of error
14945 */
14946 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14947 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14948 int size, const char *filename, const char *encoding)
14949 {
14950 xmlParserInputPtr inputStream;
14951 xmlParserInputBufferPtr buf;
14952 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14953
14954 if (ctxt == NULL)
14955 return(1);
14956
14957 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14958 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14959
14960 buf = xmlAllocParserInputBuffer(enc);
14961 if (buf == NULL)
14962 return(1);
14963
14964 if (ctxt == NULL) {
14965 xmlFreeParserInputBuffer(buf);
14966 return(1);
14967 }
14968
14969 xmlCtxtReset(ctxt);
14970
14971 if (ctxt->pushTab == NULL) {
14972 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14973 sizeof(xmlChar *));
14974 if (ctxt->pushTab == NULL) {
14975 xmlErrMemory(ctxt, NULL);
14976 xmlFreeParserInputBuffer(buf);
14977 return(1);
14978 }
14979 }
14980
14981 if (filename == NULL) {
14982 ctxt->directory = NULL;
14983 } else {
14984 ctxt->directory = xmlParserGetDirectory(filename);
14985 }
14986
14987 inputStream = xmlNewInputStream(ctxt);
14988 if (inputStream == NULL) {
14989 xmlFreeParserInputBuffer(buf);
14990 return(1);
14991 }
14992
14993 if (filename == NULL)
14994 inputStream->filename = NULL;
14995 else
14996 inputStream->filename = (char *)
14997 xmlCanonicPath((const xmlChar *) filename);
14998 inputStream->buf = buf;
14999 xmlBufResetInput(buf->buffer, inputStream);
15000
15001 inputPush(ctxt, inputStream);
15002
15003 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15004 (ctxt->input->buf != NULL)) {
15005 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15006 size_t cur = ctxt->input->cur - ctxt->input->base;
15007
15008 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15009
15010 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15011 #ifdef DEBUG_PUSH
15012 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15013 #endif
15014 }
15015
15016 if (encoding != NULL) {
15017 xmlCharEncodingHandlerPtr hdlr;
15018
15019 if (ctxt->encoding != NULL)
15020 xmlFree((xmlChar *) ctxt->encoding);
15021 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15022
15023 hdlr = xmlFindCharEncodingHandler(encoding);
15024 if (hdlr != NULL) {
15025 xmlSwitchToEncoding(ctxt, hdlr);
15026 } else {
15027 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15028 "Unsupported encoding %s\n", BAD_CAST encoding);
15029 }
15030 } else if (enc != XML_CHAR_ENCODING_NONE) {
15031 xmlSwitchEncoding(ctxt, enc);
15032 }
15033
15034 return(0);
15035 }
15036
15037
15038 /**
15039 * xmlCtxtUseOptionsInternal:
15040 * @ctxt: an XML parser context
15041 * @options: a combination of xmlParserOption
15042 * @encoding: the user provided encoding to use
15043 *
15044 * Applies the options to the parser context
15045 *
15046 * Returns 0 in case of success, the set of unknown or unimplemented options
15047 * in case of error.
15048 */
15049 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15050 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15051 {
15052 if (ctxt == NULL)
15053 return(-1);
15054 if (encoding != NULL) {
15055 if (ctxt->encoding != NULL)
15056 xmlFree((xmlChar *) ctxt->encoding);
15057 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15058 }
15059 if (options & XML_PARSE_RECOVER) {
15060 ctxt->recovery = 1;
15061 options -= XML_PARSE_RECOVER;
15062 ctxt->options |= XML_PARSE_RECOVER;
15063 } else
15064 ctxt->recovery = 0;
15065 if (options & XML_PARSE_DTDLOAD) {
15066 ctxt->loadsubset = XML_DETECT_IDS;
15067 options -= XML_PARSE_DTDLOAD;
15068 ctxt->options |= XML_PARSE_DTDLOAD;
15069 } else
15070 ctxt->loadsubset = 0;
15071 if (options & XML_PARSE_DTDATTR) {
15072 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15073 options -= XML_PARSE_DTDATTR;
15074 ctxt->options |= XML_PARSE_DTDATTR;
15075 }
15076 if (options & XML_PARSE_NOENT) {
15077 ctxt->replaceEntities = 1;
15078 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15079 options -= XML_PARSE_NOENT;
15080 ctxt->options |= XML_PARSE_NOENT;
15081 } else
15082 ctxt->replaceEntities = 0;
15083 if (options & XML_PARSE_PEDANTIC) {
15084 ctxt->pedantic = 1;
15085 options -= XML_PARSE_PEDANTIC;
15086 ctxt->options |= XML_PARSE_PEDANTIC;
15087 } else
15088 ctxt->pedantic = 0;
15089 if (options & XML_PARSE_NOBLANKS) {
15090 ctxt->keepBlanks = 0;
15091 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15092 options -= XML_PARSE_NOBLANKS;
15093 ctxt->options |= XML_PARSE_NOBLANKS;
15094 } else
15095 ctxt->keepBlanks = 1;
15096 if (options & XML_PARSE_DTDVALID) {
15097 ctxt->validate = 1;
15098 if (options & XML_PARSE_NOWARNING)
15099 ctxt->vctxt.warning = NULL;
15100 if (options & XML_PARSE_NOERROR)
15101 ctxt->vctxt.error = NULL;
15102 options -= XML_PARSE_DTDVALID;
15103 ctxt->options |= XML_PARSE_DTDVALID;
15104 } else
15105 ctxt->validate = 0;
15106 if (options & XML_PARSE_NOWARNING) {
15107 ctxt->sax->warning = NULL;
15108 options -= XML_PARSE_NOWARNING;
15109 }
15110 if (options & XML_PARSE_NOERROR) {
15111 ctxt->sax->error = NULL;
15112 ctxt->sax->fatalError = NULL;
15113 options -= XML_PARSE_NOERROR;
15114 }
15115 #ifdef LIBXML_SAX1_ENABLED
15116 if (options & XML_PARSE_SAX1) {
15117 ctxt->sax->startElement = xmlSAX2StartElement;
15118 ctxt->sax->endElement = xmlSAX2EndElement;
15119 ctxt->sax->startElementNs = NULL;
15120 ctxt->sax->endElementNs = NULL;
15121 ctxt->sax->initialized = 1;
15122 options -= XML_PARSE_SAX1;
15123 ctxt->options |= XML_PARSE_SAX1;
15124 }
15125 #endif /* LIBXML_SAX1_ENABLED */
15126 if (options & XML_PARSE_NODICT) {
15127 ctxt->dictNames = 0;
15128 options -= XML_PARSE_NODICT;
15129 ctxt->options |= XML_PARSE_NODICT;
15130 } else {
15131 ctxt->dictNames = 1;
15132 }
15133 if (options & XML_PARSE_NOCDATA) {
15134 ctxt->sax->cdataBlock = NULL;
15135 options -= XML_PARSE_NOCDATA;
15136 ctxt->options |= XML_PARSE_NOCDATA;
15137 }
15138 if (options & XML_PARSE_NSCLEAN) {
15139 ctxt->options |= XML_PARSE_NSCLEAN;
15140 options -= XML_PARSE_NSCLEAN;
15141 }
15142 if (options & XML_PARSE_NONET) {
15143 ctxt->options |= XML_PARSE_NONET;
15144 options -= XML_PARSE_NONET;
15145 }
15146 if (options & XML_PARSE_COMPACT) {
15147 ctxt->options |= XML_PARSE_COMPACT;
15148 options -= XML_PARSE_COMPACT;
15149 }
15150 if (options & XML_PARSE_OLD10) {
15151 ctxt->options |= XML_PARSE_OLD10;
15152 options -= XML_PARSE_OLD10;
15153 }
15154 if (options & XML_PARSE_NOBASEFIX) {
15155 ctxt->options |= XML_PARSE_NOBASEFIX;
15156 options -= XML_PARSE_NOBASEFIX;
15157 }
15158 if (options & XML_PARSE_HUGE) {
15159 ctxt->options |= XML_PARSE_HUGE;
15160 options -= XML_PARSE_HUGE;
15161 if (ctxt->dict != NULL)
15162 xmlDictSetLimit(ctxt->dict, 0);
15163 }
15164 if (options & XML_PARSE_OLDSAX) {
15165 ctxt->options |= XML_PARSE_OLDSAX;
15166 options -= XML_PARSE_OLDSAX;
15167 }
15168 if (options & XML_PARSE_IGNORE_ENC) {
15169 ctxt->options |= XML_PARSE_IGNORE_ENC;
15170 options -= XML_PARSE_IGNORE_ENC;
15171 }
15172 if (options & XML_PARSE_BIG_LINES) {
15173 ctxt->options |= XML_PARSE_BIG_LINES;
15174 options -= XML_PARSE_BIG_LINES;
15175 }
15176 ctxt->linenumbers = 1;
15177 return (options);
15178 }
15179
15180 /**
15181 * xmlCtxtUseOptions:
15182 * @ctxt: an XML parser context
15183 * @options: a combination of xmlParserOption
15184 *
15185 * Applies the options to the parser context
15186 *
15187 * Returns 0 in case of success, the set of unknown or unimplemented options
15188 * in case of error.
15189 */
15190 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15191 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15192 {
15193 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15194 }
15195
15196 /**
15197 * xmlDoRead:
15198 * @ctxt: an XML parser context
15199 * @URL: the base URL to use for the document
15200 * @encoding: the document encoding, or NULL
15201 * @options: a combination of xmlParserOption
15202 * @reuse: keep the context for reuse
15203 *
15204 * Common front-end for the xmlRead functions
15205 *
15206 * Returns the resulting document tree or NULL
15207 */
15208 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15209 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15210 int options, int reuse)
15211 {
15212 xmlDocPtr ret;
15213
15214 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15215 if (encoding != NULL) {
15216 xmlCharEncodingHandlerPtr hdlr;
15217
15218 hdlr = xmlFindCharEncodingHandler(encoding);
15219 if (hdlr != NULL)
15220 xmlSwitchToEncoding(ctxt, hdlr);
15221 }
15222 if ((URL != NULL) && (ctxt->input != NULL) &&
15223 (ctxt->input->filename == NULL))
15224 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15225 xmlParseDocument(ctxt);
15226 if ((ctxt->wellFormed) || ctxt->recovery)
15227 ret = ctxt->myDoc;
15228 else {
15229 ret = NULL;
15230 if (ctxt->myDoc != NULL) {
15231 xmlFreeDoc(ctxt->myDoc);
15232 }
15233 }
15234 ctxt->myDoc = NULL;
15235 if (!reuse) {
15236 xmlFreeParserCtxt(ctxt);
15237 }
15238
15239 return (ret);
15240 }
15241
15242 /**
15243 * xmlReadDoc:
15244 * @cur: a pointer to a zero terminated string
15245 * @URL: the base URL to use for the document
15246 * @encoding: the document encoding, or NULL
15247 * @options: a combination of xmlParserOption
15248 *
15249 * parse an XML in-memory document and build a tree.
15250 *
15251 * Returns the resulting document tree
15252 */
15253 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15254 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15255 {
15256 xmlParserCtxtPtr ctxt;
15257
15258 if (cur == NULL)
15259 return (NULL);
15260 xmlInitParser();
15261
15262 ctxt = xmlCreateDocParserCtxt(cur);
15263 if (ctxt == NULL)
15264 return (NULL);
15265 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15266 }
15267
15268 /**
15269 * xmlReadFile:
15270 * @filename: a file or URL
15271 * @encoding: the document encoding, or NULL
15272 * @options: a combination of xmlParserOption
15273 *
15274 * parse an XML file from the filesystem or the network.
15275 *
15276 * Returns the resulting document tree
15277 */
15278 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15279 xmlReadFile(const char *filename, const char *encoding, int options)
15280 {
15281 xmlParserCtxtPtr ctxt;
15282
15283 xmlInitParser();
15284 ctxt = xmlCreateURLParserCtxt(filename, options);
15285 if (ctxt == NULL)
15286 return (NULL);
15287 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15288 }
15289
15290 /**
15291 * xmlReadMemory:
15292 * @buffer: a pointer to a char array
15293 * @size: the size of the array
15294 * @URL: the base URL to use for the document
15295 * @encoding: the document encoding, or NULL
15296 * @options: a combination of xmlParserOption
15297 *
15298 * parse an XML in-memory document and build a tree.
15299 *
15300 * Returns the resulting document tree
15301 */
15302 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15303 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15304 {
15305 xmlParserCtxtPtr ctxt;
15306
15307 xmlInitParser();
15308 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15309 if (ctxt == NULL)
15310 return (NULL);
15311 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15312 }
15313
15314 /**
15315 * xmlReadFd:
15316 * @fd: an open file descriptor
15317 * @URL: the base URL to use for the document
15318 * @encoding: the document encoding, or NULL
15319 * @options: a combination of xmlParserOption
15320 *
15321 * parse an XML from a file descriptor and build a tree.
15322 * NOTE that the file descriptor will not be closed when the
15323 * reader is closed or reset.
15324 *
15325 * Returns the resulting document tree
15326 */
15327 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15328 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15329 {
15330 xmlParserCtxtPtr ctxt;
15331 xmlParserInputBufferPtr input;
15332 xmlParserInputPtr stream;
15333
15334 if (fd < 0)
15335 return (NULL);
15336 xmlInitParser();
15337
15338 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15339 if (input == NULL)
15340 return (NULL);
15341 input->closecallback = NULL;
15342 ctxt = xmlNewParserCtxt();
15343 if (ctxt == NULL) {
15344 xmlFreeParserInputBuffer(input);
15345 return (NULL);
15346 }
15347 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15348 if (stream == NULL) {
15349 xmlFreeParserInputBuffer(input);
15350 xmlFreeParserCtxt(ctxt);
15351 return (NULL);
15352 }
15353 inputPush(ctxt, stream);
15354 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15355 }
15356
15357 /**
15358 * xmlReadIO:
15359 * @ioread: an I/O read function
15360 * @ioclose: an I/O close function
15361 * @ioctx: an I/O handler
15362 * @URL: the base URL to use for the document
15363 * @encoding: the document encoding, or NULL
15364 * @options: a combination of xmlParserOption
15365 *
15366 * parse an XML document from I/O functions and source and build a tree.
15367 *
15368 * Returns the resulting document tree
15369 */
15370 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15371 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15372 void *ioctx, const char *URL, const char *encoding, int options)
15373 {
15374 xmlParserCtxtPtr ctxt;
15375 xmlParserInputBufferPtr input;
15376 xmlParserInputPtr stream;
15377
15378 if (ioread == NULL)
15379 return (NULL);
15380 xmlInitParser();
15381
15382 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15383 XML_CHAR_ENCODING_NONE);
15384 if (input == NULL) {
15385 if (ioclose != NULL)
15386 ioclose(ioctx);
15387 return (NULL);
15388 }
15389 ctxt = xmlNewParserCtxt();
15390 if (ctxt == NULL) {
15391 xmlFreeParserInputBuffer(input);
15392 return (NULL);
15393 }
15394 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15395 if (stream == NULL) {
15396 xmlFreeParserInputBuffer(input);
15397 xmlFreeParserCtxt(ctxt);
15398 return (NULL);
15399 }
15400 inputPush(ctxt, stream);
15401 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15402 }
15403
15404 /**
15405 * xmlCtxtReadDoc:
15406 * @ctxt: an XML parser context
15407 * @cur: a pointer to a zero terminated string
15408 * @URL: the base URL to use for the document
15409 * @encoding: the document encoding, or NULL
15410 * @options: a combination of xmlParserOption
15411 *
15412 * parse an XML in-memory document and build a tree.
15413 * This reuses the existing @ctxt parser context
15414 *
15415 * Returns the resulting document tree
15416 */
15417 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15418 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15419 const char *URL, const char *encoding, int options)
15420 {
15421 xmlParserInputPtr stream;
15422
15423 if (cur == NULL)
15424 return (NULL);
15425 if (ctxt == NULL)
15426 return (NULL);
15427 xmlInitParser();
15428
15429 xmlCtxtReset(ctxt);
15430
15431 stream = xmlNewStringInputStream(ctxt, cur);
15432 if (stream == NULL) {
15433 return (NULL);
15434 }
15435 inputPush(ctxt, stream);
15436 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15437 }
15438
15439 /**
15440 * xmlCtxtReadFile:
15441 * @ctxt: an XML parser context
15442 * @filename: a file or URL
15443 * @encoding: the document encoding, or NULL
15444 * @options: a combination of xmlParserOption
15445 *
15446 * parse an XML file from the filesystem or the network.
15447 * This reuses the existing @ctxt parser context
15448 *
15449 * Returns the resulting document tree
15450 */
15451 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15452 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15453 const char *encoding, int options)
15454 {
15455 xmlParserInputPtr stream;
15456
15457 if (filename == NULL)
15458 return (NULL);
15459 if (ctxt == NULL)
15460 return (NULL);
15461 xmlInitParser();
15462
15463 xmlCtxtReset(ctxt);
15464
15465 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15466 if (stream == NULL) {
15467 return (NULL);
15468 }
15469 inputPush(ctxt, stream);
15470 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15471 }
15472
15473 /**
15474 * xmlCtxtReadMemory:
15475 * @ctxt: an XML parser context
15476 * @buffer: a pointer to a char array
15477 * @size: the size of the array
15478 * @URL: the base URL to use for the document
15479 * @encoding: the document encoding, or NULL
15480 * @options: a combination of xmlParserOption
15481 *
15482 * parse an XML in-memory document and build a tree.
15483 * This reuses the existing @ctxt parser context
15484 *
15485 * Returns the resulting document tree
15486 */
15487 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15488 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15489 const char *URL, const char *encoding, int options)
15490 {
15491 xmlParserInputBufferPtr input;
15492 xmlParserInputPtr stream;
15493
15494 if (ctxt == NULL)
15495 return (NULL);
15496 if (buffer == NULL)
15497 return (NULL);
15498 xmlInitParser();
15499
15500 xmlCtxtReset(ctxt);
15501
15502 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15503 if (input == NULL) {
15504 return(NULL);
15505 }
15506
15507 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15508 if (stream == NULL) {
15509 xmlFreeParserInputBuffer(input);
15510 return(NULL);
15511 }
15512
15513 inputPush(ctxt, stream);
15514 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15515 }
15516
15517 /**
15518 * xmlCtxtReadFd:
15519 * @ctxt: an XML parser context
15520 * @fd: an open file descriptor
15521 * @URL: the base URL to use for the document
15522 * @encoding: the document encoding, or NULL
15523 * @options: a combination of xmlParserOption
15524 *
15525 * parse an XML from a file descriptor and build a tree.
15526 * This reuses the existing @ctxt parser context
15527 * NOTE that the file descriptor will not be closed when the
15528 * reader is closed or reset.
15529 *
15530 * Returns the resulting document tree
15531 */
15532 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15533 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15534 const char *URL, const char *encoding, int options)
15535 {
15536 xmlParserInputBufferPtr input;
15537 xmlParserInputPtr stream;
15538
15539 if (fd < 0)
15540 return (NULL);
15541 if (ctxt == NULL)
15542 return (NULL);
15543 xmlInitParser();
15544
15545 xmlCtxtReset(ctxt);
15546
15547
15548 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15549 if (input == NULL)
15550 return (NULL);
15551 input->closecallback = NULL;
15552 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15553 if (stream == NULL) {
15554 xmlFreeParserInputBuffer(input);
15555 return (NULL);
15556 }
15557 inputPush(ctxt, stream);
15558 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15559 }
15560
15561 /**
15562 * xmlCtxtReadIO:
15563 * @ctxt: an XML parser context
15564 * @ioread: an I/O read function
15565 * @ioclose: an I/O close function
15566 * @ioctx: an I/O handler
15567 * @URL: the base URL to use for the document
15568 * @encoding: the document encoding, or NULL
15569 * @options: a combination of xmlParserOption
15570 *
15571 * parse an XML document from I/O functions and source and build a tree.
15572 * This reuses the existing @ctxt parser context
15573 *
15574 * Returns the resulting document tree
15575 */
15576 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15577 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15578 xmlInputCloseCallback ioclose, void *ioctx,
15579 const char *URL,
15580 const char *encoding, int options)
15581 {
15582 xmlParserInputBufferPtr input;
15583 xmlParserInputPtr stream;
15584
15585 if (ioread == NULL)
15586 return (NULL);
15587 if (ctxt == NULL)
15588 return (NULL);
15589 xmlInitParser();
15590
15591 xmlCtxtReset(ctxt);
15592
15593 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15594 XML_CHAR_ENCODING_NONE);
15595 if (input == NULL) {
15596 if (ioclose != NULL)
15597 ioclose(ioctx);
15598 return (NULL);
15599 }
15600 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15601 if (stream == NULL) {
15602 xmlFreeParserInputBuffer(input);
15603 return (NULL);
15604 }
15605 inputPush(ctxt, stream);
15606 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15607 }
15608
15609 #define bottom_parser
15610 #include "elfgcchack.h"
15611