1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
90 /************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
98
99 /*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105 #define XML_PARSER_NON_LINEAR 10
106
107 /*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119 {
120 unsigned long consumed = 0;
121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175 }
176
177 /**
178 * xmlParserMaxDepth:
179 *
180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
184 */
185 unsigned int xmlParserMaxDepth = 256;
186
187
188
189 #define SAX2 1
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
194 /*
195 * List of XML prefixed PI allowed by W3C specs
196 */
197
198 static const char *xmlW3CPIs[] = {
199 "xml-stylesheet",
200 NULL
201 };
202
203
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
207
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
211 void *user_data, int depth, const xmlChar *URL,
212 const xmlChar *ID, xmlNodePtr *list);
213
214 static int
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
218 static void
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
222
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
226
227 static int
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
230 /************************************************************************
231 * *
232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236 /**
237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247 {
248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253
254 if (prefix == NULL)
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
259 else
260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
270 }
271
272 /**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
282 {
283 const char *errmsg;
284
285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
292 case XML_ERR_INVALID_DEC_CHARREF:
293 errmsg = "CharRef: invalid decimal value\n";
294 break;
295 case XML_ERR_INVALID_CHARREF:
296 errmsg = "CharRef: invalid value\n";
297 break;
298 case XML_ERR_INTERNAL_ERROR:
299 errmsg = "internal error";
300 break;
301 case XML_ERR_PEREF_AT_EOF:
302 errmsg = "PEReference at end of document\n";
303 break;
304 case XML_ERR_PEREF_IN_PROLOG:
305 errmsg = "PEReference in prolog\n";
306 break;
307 case XML_ERR_PEREF_IN_EPILOG:
308 errmsg = "PEReference in epilog\n";
309 break;
310 case XML_ERR_PEREF_NO_NAME:
311 errmsg = "PEReference: no name\n";
312 break;
313 case XML_ERR_PEREF_SEMICOL_MISSING:
314 errmsg = "PEReference: expecting ';'\n";
315 break;
316 case XML_ERR_ENTITY_LOOP:
317 errmsg = "Detected an entity reference loop\n";
318 break;
319 case XML_ERR_ENTITY_NOT_STARTED:
320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
322 case XML_ERR_ENTITY_PE_INTERNAL:
323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
325 case XML_ERR_ENTITY_NOT_FINISHED:
326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
329 errmsg = "AttValue: \" or ' expected\n";
330 break;
331 case XML_ERR_LT_IN_ATTRIBUTE:
332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
334 case XML_ERR_LITERAL_NOT_STARTED:
335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
337 case XML_ERR_LITERAL_NOT_FINISHED:
338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
340 case XML_ERR_MISPLACED_CDATA_END:
341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
343 case XML_ERR_URI_REQUIRED:
344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
346 case XML_ERR_PUBID_REQUIRED:
347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
349 case XML_ERR_HYPHEN_IN_COMMENT:
350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
352 case XML_ERR_PI_NOT_STARTED:
353 errmsg = "xmlParsePI : no target name\n";
354 break;
355 case XML_ERR_RESERVED_XML_NAME:
356 errmsg = "Invalid PI name\n";
357 break;
358 case XML_ERR_NOTATION_NOT_STARTED:
359 errmsg = "NOTATION: Name expected here\n";
360 break;
361 case XML_ERR_NOTATION_NOT_FINISHED:
362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
364 case XML_ERR_VALUE_REQUIRED:
365 errmsg = "Entity value required\n";
366 break;
367 case XML_ERR_URI_FRAGMENT:
368 errmsg = "Fragment not allowed";
369 break;
370 case XML_ERR_ATTLIST_NOT_STARTED:
371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
373 case XML_ERR_NMTOKEN_REQUIRED:
374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
376 case XML_ERR_ATTLIST_NOT_FINISHED:
377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
379 case XML_ERR_MIXED_NOT_STARTED:
380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
382 case XML_ERR_PCDATA_REQUIRED:
383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
391 case XML_ERR_PEREF_IN_INT_SUBSET:
392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
395 case XML_ERR_GT_REQUIRED:
396 errmsg = "expected '>'\n";
397 break;
398 case XML_ERR_CONDSEC_INVALID:
399 errmsg = "XML conditional section '[' expected\n";
400 break;
401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
408 case XML_ERR_CONDSEC_NOT_FINISHED:
409 errmsg = "XML conditional section not closed\n";
410 break;
411 case XML_ERR_XMLDECL_NOT_STARTED:
412 errmsg = "Text declaration '<?xml' required\n";
413 break;
414 case XML_ERR_XMLDECL_NOT_FINISHED:
415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
417 case XML_ERR_EXT_ENTITY_STANDALONE:
418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421 errmsg = "EntityRef: expecting ';'\n";
422 break;
423 case XML_ERR_DOCTYPE_NOT_FINISHED:
424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
426 case XML_ERR_LTSLASH_REQUIRED:
427 errmsg = "EndTag: '</' not found\n";
428 break;
429 case XML_ERR_EQUAL_REQUIRED:
430 errmsg = "expected '='\n";
431 break;
432 case XML_ERR_STRING_NOT_CLOSED:
433 errmsg = "String not closed expecting \" or '\n";
434 break;
435 case XML_ERR_STRING_NOT_STARTED:
436 errmsg = "String not started expecting ' or \"\n";
437 break;
438 case XML_ERR_ENCODING_NAME:
439 errmsg = "Invalid XML encoding name\n";
440 break;
441 case XML_ERR_STANDALONE_VALUE:
442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
444 case XML_ERR_DOCUMENT_EMPTY:
445 errmsg = "Document is empty\n";
446 break;
447 case XML_ERR_DOCUMENT_END:
448 errmsg = "Extra content at the end of the document\n";
449 break;
450 case XML_ERR_NOT_WELL_BALANCED:
451 errmsg = "chunk is not well balanced\n";
452 break;
453 case XML_ERR_EXTRA_CONTENT:
454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
456 case XML_ERR_VERSION_MISSING:
457 errmsg = "Malformed declaration expecting version\n";
458 break;
459 #if 0
460 case:
461 errmsg = "\n";
462 break;
463 #endif
464 default:
465 errmsg = "Unregistered error message\n";
466 }
467 if (ctxt != NULL)
468 ctxt->errNo = error;
469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
477 }
478
479 /**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
490 {
491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
494 if (ctxt != NULL)
495 ctxt->errNo = error;
496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
503 }
504
505 /**
506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518 {
519 xmlStructuredErrorFunc schannel = NULL;
520
521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
526 schannel = ctxt->sax->serror;
527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
542 }
543
544 /**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
551 * Handle a validity error.
552 */
553 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555 const char *msg, const xmlChar *str1, const xmlChar *str2)
556 {
557 xmlStructuredErrorFunc schannel = NULL;
558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
569 ctxt->vctxt.error, ctxt->vctxt.userData,
570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
574 ctxt->valid = 0;
575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
581 }
582 }
583
584 /**
585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, int val)
596 {
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL,
603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
610 }
611
612 /**
613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627 {
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL)
632 ctxt->errNo = error;
633 __xmlRaiseError(NULL, NULL, NULL,
634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
642 }
643
644 /**
645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655 const char *msg, const xmlChar * val)
656 {
657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
660 if (ctxt != NULL)
661 ctxt->errNo = error;
662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
671 }
672
673 /**
674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685 {
686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
689 if (ctxt != NULL)
690 ctxt->errNo = error;
691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695 }
696
697 /**
698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
712 {
713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
716 if (ctxt != NULL)
717 ctxt->errNo = error;
718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
724 }
725
726 /**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741 {
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749 }
750
751 /************************************************************************
752 * *
753 * Library wide options *
754 * *
755 ************************************************************************/
756
757 /**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767 int
xmlHasFeature(xmlFeature feature)768 xmlHasFeature(xmlFeature feature)
769 {
770 switch (feature) {
771 case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
773 return(1);
774 #else
775 return(0);
776 #endif
777 case XML_WITH_TREE:
778 #ifdef LIBXML_TREE_ENABLED
779 return(1);
780 #else
781 return(0);
782 #endif
783 case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786 #else
787 return(0);
788 #endif
789 case XML_WITH_PUSH:
790 #ifdef LIBXML_PUSH_ENABLED
791 return(1);
792 #else
793 return(0);
794 #endif
795 case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
797 return(1);
798 #else
799 return(0);
800 #endif
801 case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804 #else
805 return(0);
806 #endif
807 case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
809 return(1);
810 #else
811 return(0);
812 #endif
813 case XML_WITH_SAX1:
814 #ifdef LIBXML_SAX1_ENABLED
815 return(1);
816 #else
817 return(0);
818 #endif
819 case XML_WITH_FTP:
820 #ifdef LIBXML_FTP_ENABLED
821 return(1);
822 #else
823 return(0);
824 #endif
825 case XML_WITH_HTTP:
826 #ifdef LIBXML_HTTP_ENABLED
827 return(1);
828 #else
829 return(0);
830 #endif
831 case XML_WITH_VALID:
832 #ifdef LIBXML_VALID_ENABLED
833 return(1);
834 #else
835 return(0);
836 #endif
837 case XML_WITH_HTML:
838 #ifdef LIBXML_HTML_ENABLED
839 return(1);
840 #else
841 return(0);
842 #endif
843 case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846 #else
847 return(0);
848 #endif
849 case XML_WITH_C14N:
850 #ifdef LIBXML_C14N_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_XPATH:
862 #ifdef LIBXML_XPATH_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_XPTR:
868 #ifdef LIBXML_XPTR_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_ICONV:
880 #ifdef LIBXML_ICONV_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_EXPR:
910 #ifdef LIBXML_EXPR_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_DEBUG:
934 #ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_ZLIB:
952 #ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_ICU:
958 #ifdef LIBXML_ICU_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 default:
964 break;
965 }
966 return(0);
967 }
968
969 /************************************************************************
970 * *
971 * SAX2 defaulted attributes handling *
972 * *
973 ************************************************************************/
974
975 /**
976 * xmlDetectSAX2:
977 * @ctxt: an XML parser context
978 *
979 * Do the SAX2 detection and specific intialization
980 */
981 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983 if (ctxt == NULL) return;
984 #ifdef LIBXML_SAX1_ENABLED
985 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986 ((ctxt->sax->startElementNs != NULL) ||
987 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
988 #else
989 ctxt->sax2 = 1;
990 #endif /* LIBXML_SAX1_ENABLED */
991
992 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996 (ctxt->str_xml_ns == NULL)) {
997 xmlErrMemory(ctxt, NULL);
998 }
999 }
1000
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004 int nbAttrs; /* number of defaulted attributes on that element */
1005 int maxAttrs; /* the size of the array */
1006 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1007 };
1008
1009 /**
1010 * xmlAttrNormalizeSpace:
1011 * @src: the source string
1012 * @dst: the target string
1013 *
1014 * Normalize the space in non CDATA attribute values:
1015 * If the attribute type is not CDATA, then the XML processor MUST further
1016 * process the normalized attribute value by discarding any leading and
1017 * trailing space (#x20) characters, and by replacing sequences of space
1018 * (#x20) characters by a single space (#x20) character.
1019 * Note that the size of dst need to be at least src, and if one doesn't need
1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021 * passing src as dst is just fine.
1022 *
1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024 * is needed.
1025 */
1026 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028 {
1029 if ((src == NULL) || (dst == NULL))
1030 return(NULL);
1031
1032 while (*src == 0x20) src++;
1033 while (*src != 0) {
1034 if (*src == 0x20) {
1035 while (*src == 0x20) src++;
1036 if (*src != 0)
1037 *dst++ = 0x20;
1038 } else {
1039 *dst++ = *src++;
1040 }
1041 }
1042 *dst = 0;
1043 if (dst == src)
1044 return(NULL);
1045 return(dst);
1046 }
1047
1048 /**
1049 * xmlAttrNormalizeSpace2:
1050 * @src: the source string
1051 *
1052 * Normalize the space in non CDATA attribute values, a slightly more complex
1053 * front end to avoid allocation problems when running on attribute values
1054 * coming from the input.
1055 *
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 * is needed.
1058 */
1059 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1061 {
1062 int i;
1063 int remove_head = 0;
1064 int need_realloc = 0;
1065 const xmlChar *cur;
1066
1067 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068 return(NULL);
1069 i = *len;
1070 if (i <= 0)
1071 return(NULL);
1072
1073 cur = src;
1074 while (*cur == 0x20) {
1075 cur++;
1076 remove_head++;
1077 }
1078 while (*cur != 0) {
1079 if (*cur == 0x20) {
1080 cur++;
1081 if ((*cur == 0x20) || (*cur == 0)) {
1082 need_realloc = 1;
1083 break;
1084 }
1085 } else
1086 cur++;
1087 }
1088 if (need_realloc) {
1089 xmlChar *ret;
1090
1091 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092 if (ret == NULL) {
1093 xmlErrMemory(ctxt, NULL);
1094 return(NULL);
1095 }
1096 xmlAttrNormalizeSpace(ret, ret);
1097 *len = (int) strlen((const char *)ret);
1098 return(ret);
1099 } else if (remove_head) {
1100 *len -= remove_head;
1101 memmove(src, src + remove_head, 1 + *len);
1102 return(src);
1103 }
1104 return(NULL);
1105 }
1106
1107 /**
1108 * xmlAddDefAttrs:
1109 * @ctxt: an XML parser context
1110 * @fullname: the element fullname
1111 * @fullattr: the attribute fullname
1112 * @value: the attribute value
1113 *
1114 * Add a defaulted attribute for an element
1115 */
1116 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118 const xmlChar *fullname,
1119 const xmlChar *fullattr,
1120 const xmlChar *value) {
1121 xmlDefAttrsPtr defaults;
1122 int len;
1123 const xmlChar *name;
1124 const xmlChar *prefix;
1125
1126 /*
1127 * Allows to detect attribute redefinitions
1128 */
1129 if (ctxt->attsSpecial != NULL) {
1130 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131 return;
1132 }
1133
1134 if (ctxt->attsDefault == NULL) {
1135 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136 if (ctxt->attsDefault == NULL)
1137 goto mem_error;
1138 }
1139
1140 /*
1141 * split the element name into prefix:localname , the string found
1142 * are within the DTD and then not associated to namespace names.
1143 */
1144 name = xmlSplitQName3(fullname, &len);
1145 if (name == NULL) {
1146 name = xmlDictLookup(ctxt->dict, fullname, -1);
1147 prefix = NULL;
1148 } else {
1149 name = xmlDictLookup(ctxt->dict, name, -1);
1150 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151 }
1152
1153 /*
1154 * make sure there is some storage
1155 */
1156 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157 if (defaults == NULL) {
1158 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159 (4 * 5) * sizeof(const xmlChar *));
1160 if (defaults == NULL)
1161 goto mem_error;
1162 defaults->nbAttrs = 0;
1163 defaults->maxAttrs = 4;
1164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 defaults, NULL) < 0) {
1166 xmlFree(defaults);
1167 goto mem_error;
1168 }
1169 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170 xmlDefAttrsPtr temp;
1171
1172 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174 if (temp == NULL)
1175 goto mem_error;
1176 defaults = temp;
1177 defaults->maxAttrs *= 2;
1178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1180 xmlFree(defaults);
1181 goto mem_error;
1182 }
1183 }
1184
1185 /*
1186 * Split the element name into prefix:localname , the string found
1187 * are within the DTD and hen not associated to namespace names.
1188 */
1189 name = xmlSplitQName3(fullattr, &len);
1190 if (name == NULL) {
1191 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192 prefix = NULL;
1193 } else {
1194 name = xmlDictLookup(ctxt->dict, name, -1);
1195 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196 }
1197
1198 defaults->values[5 * defaults->nbAttrs] = name;
1199 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200 /* intern the string and precompute the end */
1201 len = xmlStrlen(value);
1202 value = xmlDictLookup(ctxt->dict, value, len);
1203 defaults->values[5 * defaults->nbAttrs + 2] = value;
1204 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205 if (ctxt->external)
1206 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207 else
1208 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209 defaults->nbAttrs++;
1210
1211 return;
1212
1213 mem_error:
1214 xmlErrMemory(ctxt, NULL);
1215 return;
1216 }
1217
1218 /**
1219 * xmlAddSpecialAttr:
1220 * @ctxt: an XML parser context
1221 * @fullname: the element fullname
1222 * @fullattr: the attribute fullname
1223 * @type: the attribute type
1224 *
1225 * Register this attribute type
1226 */
1227 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 const xmlChar *fullname,
1230 const xmlChar *fullattr,
1231 int type)
1232 {
1233 if (ctxt->attsSpecial == NULL) {
1234 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235 if (ctxt->attsSpecial == NULL)
1236 goto mem_error;
1237 }
1238
1239 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240 return;
1241
1242 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243 (void *) (long) type);
1244 return;
1245
1246 mem_error:
1247 xmlErrMemory(ctxt, NULL);
1248 return;
1249 }
1250
1251 /**
1252 * xmlCleanSpecialAttrCallback:
1253 *
1254 * Removes CDATA attributes from the special attribute table
1255 */
1256 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258 const xmlChar *fullname, const xmlChar *fullattr,
1259 const xmlChar *unused ATTRIBUTE_UNUSED) {
1260 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261
1262 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264 }
1265 }
1266
1267 /**
1268 * xmlCleanSpecialAttr:
1269 * @ctxt: an XML parser context
1270 *
1271 * Trim the list of attributes defined to remove all those of type
1272 * CDATA as they are not special. This call should be done when finishing
1273 * to parse the DTD and before starting to parse the document root.
1274 */
1275 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277 {
1278 if (ctxt->attsSpecial == NULL)
1279 return;
1280
1281 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282
1283 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284 xmlHashFree(ctxt->attsSpecial, NULL);
1285 ctxt->attsSpecial = NULL;
1286 }
1287 return;
1288 }
1289
1290 /**
1291 * xmlCheckLanguageID:
1292 * @lang: pointer to the string value
1293 *
1294 * Checks that the value conforms to the LanguageID production:
1295 *
1296 * NOTE: this is somewhat deprecated, those productions were removed from
1297 * the XML Second edition.
1298 *
1299 * [33] LanguageID ::= Langcode ('-' Subcode)*
1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304 * [38] Subcode ::= ([a-z] | [A-Z])+
1305 *
1306 * The current REC reference the sucessors of RFC 1766, currently 5646
1307 *
1308 * http://www.rfc-editor.org/rfc/rfc5646.txt
1309 * langtag = language
1310 * ["-" script]
1311 * ["-" region]
1312 * *("-" variant)
1313 * *("-" extension)
1314 * ["-" privateuse]
1315 * language = 2*3ALPHA ; shortest ISO 639 code
1316 * ["-" extlang] ; sometimes followed by
1317 * ; extended language subtags
1318 * / 4ALPHA ; or reserved for future use
1319 * / 5*8ALPHA ; or registered language subtag
1320 *
1321 * extlang = 3ALPHA ; selected ISO 639 codes
1322 * *2("-" 3ALPHA) ; permanently reserved
1323 *
1324 * script = 4ALPHA ; ISO 15924 code
1325 *
1326 * region = 2ALPHA ; ISO 3166-1 code
1327 * / 3DIGIT ; UN M.49 code
1328 *
1329 * variant = 5*8alphanum ; registered variants
1330 * / (DIGIT 3alphanum)
1331 *
1332 * extension = singleton 1*("-" (2*8alphanum))
1333 *
1334 * ; Single alphanumerics
1335 * ; "x" reserved for private use
1336 * singleton = DIGIT ; 0 - 9
1337 * / %x41-57 ; A - W
1338 * / %x59-5A ; Y - Z
1339 * / %x61-77 ; a - w
1340 * / %x79-7A ; y - z
1341 *
1342 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343 * The parser below doesn't try to cope with extension or privateuse
1344 * that could be added but that's not interoperable anyway
1345 *
1346 * Returns 1 if correct 0 otherwise
1347 **/
1348 int
xmlCheckLanguageID(const xmlChar * lang)1349 xmlCheckLanguageID(const xmlChar * lang)
1350 {
1351 const xmlChar *cur = lang, *nxt;
1352
1353 if (cur == NULL)
1354 return (0);
1355 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1356 ((cur[0] == 'I') && (cur[1] == '-')) ||
1357 ((cur[0] == 'x') && (cur[1] == '-')) ||
1358 ((cur[0] == 'X') && (cur[1] == '-'))) {
1359 /*
1360 * Still allow IANA code and user code which were coming
1361 * from the previous version of the XML-1.0 specification
1362 * it's deprecated but we should not fail
1363 */
1364 cur += 2;
1365 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1366 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1367 cur++;
1368 return(cur[0] == 0);
1369 }
1370 nxt = cur;
1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373 nxt++;
1374 if (nxt - cur >= 4) {
1375 /*
1376 * Reserved
1377 */
1378 if ((nxt - cur > 8) || (nxt[0] != 0))
1379 return(0);
1380 return(1);
1381 }
1382 if (nxt - cur < 2)
1383 return(0);
1384 /* we got an ISO 639 code */
1385 if (nxt[0] == 0)
1386 return(1);
1387 if (nxt[0] != '-')
1388 return(0);
1389
1390 nxt++;
1391 cur = nxt;
1392 /* now we can have extlang or script or region or variant */
1393 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1394 goto region_m49;
1395
1396 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398 nxt++;
1399 if (nxt - cur == 4)
1400 goto script;
1401 if (nxt - cur == 2)
1402 goto region;
1403 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1404 goto variant;
1405 if (nxt - cur != 3)
1406 return(0);
1407 /* we parsed an extlang */
1408 if (nxt[0] == 0)
1409 return(1);
1410 if (nxt[0] != '-')
1411 return(0);
1412
1413 nxt++;
1414 cur = nxt;
1415 /* now we can have script or region or variant */
1416 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1417 goto region_m49;
1418
1419 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1421 nxt++;
1422 if (nxt - cur == 2)
1423 goto region;
1424 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1425 goto variant;
1426 if (nxt - cur != 4)
1427 return(0);
1428 /* we parsed a script */
1429 script:
1430 if (nxt[0] == 0)
1431 return(1);
1432 if (nxt[0] != '-')
1433 return(0);
1434
1435 nxt++;
1436 cur = nxt;
1437 /* now we can have region or variant */
1438 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1439 goto region_m49;
1440
1441 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1443 nxt++;
1444
1445 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1446 goto variant;
1447 if (nxt - cur != 2)
1448 return(0);
1449 /* we parsed a region */
1450 region:
1451 if (nxt[0] == 0)
1452 return(1);
1453 if (nxt[0] != '-')
1454 return(0);
1455
1456 nxt++;
1457 cur = nxt;
1458 /* now we can just have a variant */
1459 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1461 nxt++;
1462
1463 if ((nxt - cur < 5) || (nxt - cur > 8))
1464 return(0);
1465
1466 /* we parsed a variant */
1467 variant:
1468 if (nxt[0] == 0)
1469 return(1);
1470 if (nxt[0] != '-')
1471 return(0);
1472 /* extensions and private use subtags not checked */
1473 return (1);
1474
1475 region_m49:
1476 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1478 nxt += 3;
1479 goto region;
1480 }
1481 return(0);
1482 }
1483
1484 /************************************************************************
1485 * *
1486 * Parser stacks related functions and macros *
1487 * *
1488 ************************************************************************/
1489
1490 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491 const xmlChar ** str);
1492
1493 #ifdef SAX2
1494 /**
1495 * nsPush:
1496 * @ctxt: an XML parser context
1497 * @prefix: the namespace prefix or NULL
1498 * @URL: the namespace name
1499 *
1500 * Pushes a new parser namespace on top of the ns stack
1501 *
1502 * Returns -1 in case of error, -2 if the namespace should be discarded
1503 * and the index in the stack otherwise.
1504 */
1505 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1506 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1507 {
1508 if (ctxt->options & XML_PARSE_NSCLEAN) {
1509 int i;
1510 for (i = 0;i < ctxt->nsNr;i += 2) {
1511 if (ctxt->nsTab[i] == prefix) {
1512 /* in scope */
1513 if (ctxt->nsTab[i + 1] == URL)
1514 return(-2);
1515 /* out of scope keep it */
1516 break;
1517 }
1518 }
1519 }
1520 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1521 ctxt->nsMax = 10;
1522 ctxt->nsNr = 0;
1523 ctxt->nsTab = (const xmlChar **)
1524 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525 if (ctxt->nsTab == NULL) {
1526 xmlErrMemory(ctxt, NULL);
1527 ctxt->nsMax = 0;
1528 return (-1);
1529 }
1530 } else if (ctxt->nsNr >= ctxt->nsMax) {
1531 const xmlChar ** tmp;
1532 ctxt->nsMax *= 2;
1533 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1535 if (tmp == NULL) {
1536 xmlErrMemory(ctxt, NULL);
1537 ctxt->nsMax /= 2;
1538 return (-1);
1539 }
1540 ctxt->nsTab = tmp;
1541 }
1542 ctxt->nsTab[ctxt->nsNr++] = prefix;
1543 ctxt->nsTab[ctxt->nsNr++] = URL;
1544 return (ctxt->nsNr);
1545 }
1546 /**
1547 * nsPop:
1548 * @ctxt: an XML parser context
1549 * @nr: the number to pop
1550 *
1551 * Pops the top @nr parser prefix/namespace from the ns stack
1552 *
1553 * Returns the number of namespaces removed
1554 */
1555 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1556 nsPop(xmlParserCtxtPtr ctxt, int nr)
1557 {
1558 int i;
1559
1560 if (ctxt->nsTab == NULL) return(0);
1561 if (ctxt->nsNr < nr) {
1562 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1563 nr = ctxt->nsNr;
1564 }
1565 if (ctxt->nsNr <= 0)
1566 return (0);
1567
1568 for (i = 0;i < nr;i++) {
1569 ctxt->nsNr--;
1570 ctxt->nsTab[ctxt->nsNr] = NULL;
1571 }
1572 return(nr);
1573 }
1574 #endif
1575
1576 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1577 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578 const xmlChar **atts;
1579 int *attallocs;
1580 int maxatts;
1581
1582 if (ctxt->atts == NULL) {
1583 maxatts = 55; /* allow for 10 attrs by default */
1584 atts = (const xmlChar **)
1585 xmlMalloc(maxatts * sizeof(xmlChar *));
1586 if (atts == NULL) goto mem_error;
1587 ctxt->atts = atts;
1588 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589 if (attallocs == NULL) goto mem_error;
1590 ctxt->attallocs = attallocs;
1591 ctxt->maxatts = maxatts;
1592 } else if (nr + 5 > ctxt->maxatts) {
1593 maxatts = (nr + 5) * 2;
1594 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595 maxatts * sizeof(const xmlChar *));
1596 if (atts == NULL) goto mem_error;
1597 ctxt->atts = atts;
1598 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599 (maxatts / 5) * sizeof(int));
1600 if (attallocs == NULL) goto mem_error;
1601 ctxt->attallocs = attallocs;
1602 ctxt->maxatts = maxatts;
1603 }
1604 return(ctxt->maxatts);
1605 mem_error:
1606 xmlErrMemory(ctxt, NULL);
1607 return(-1);
1608 }
1609
1610 /**
1611 * inputPush:
1612 * @ctxt: an XML parser context
1613 * @value: the parser input
1614 *
1615 * Pushes a new parser input on top of the input stack
1616 *
1617 * Returns -1 in case of error, the index in the stack otherwise
1618 */
1619 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1620 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1621 {
1622 if ((ctxt == NULL) || (value == NULL))
1623 return(-1);
1624 if (ctxt->inputNr >= ctxt->inputMax) {
1625 ctxt->inputMax *= 2;
1626 ctxt->inputTab =
1627 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1628 ctxt->inputMax *
1629 sizeof(ctxt->inputTab[0]));
1630 if (ctxt->inputTab == NULL) {
1631 xmlErrMemory(ctxt, NULL);
1632 xmlFreeInputStream(value);
1633 ctxt->inputMax /= 2;
1634 value = NULL;
1635 return (-1);
1636 }
1637 }
1638 ctxt->inputTab[ctxt->inputNr] = value;
1639 ctxt->input = value;
1640 return (ctxt->inputNr++);
1641 }
1642 /**
1643 * inputPop:
1644 * @ctxt: an XML parser context
1645 *
1646 * Pops the top parser input from the input stack
1647 *
1648 * Returns the input just removed
1649 */
1650 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1651 inputPop(xmlParserCtxtPtr ctxt)
1652 {
1653 xmlParserInputPtr ret;
1654
1655 if (ctxt == NULL)
1656 return(NULL);
1657 if (ctxt->inputNr <= 0)
1658 return (NULL);
1659 ctxt->inputNr--;
1660 if (ctxt->inputNr > 0)
1661 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1662 else
1663 ctxt->input = NULL;
1664 ret = ctxt->inputTab[ctxt->inputNr];
1665 ctxt->inputTab[ctxt->inputNr] = NULL;
1666 return (ret);
1667 }
1668 /**
1669 * nodePush:
1670 * @ctxt: an XML parser context
1671 * @value: the element node
1672 *
1673 * Pushes a new element node on top of the node stack
1674 *
1675 * Returns -1 in case of error, the index in the stack otherwise
1676 */
1677 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1678 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1679 {
1680 if (ctxt == NULL) return(0);
1681 if (ctxt->nodeNr >= ctxt->nodeMax) {
1682 xmlNodePtr *tmp;
1683
1684 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1685 ctxt->nodeMax * 2 *
1686 sizeof(ctxt->nodeTab[0]));
1687 if (tmp == NULL) {
1688 xmlErrMemory(ctxt, NULL);
1689 return (-1);
1690 }
1691 ctxt->nodeTab = tmp;
1692 ctxt->nodeMax *= 2;
1693 }
1694 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1696 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1697 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1698 xmlParserMaxDepth);
1699 ctxt->instate = XML_PARSER_EOF;
1700 return(-1);
1701 }
1702 ctxt->nodeTab[ctxt->nodeNr] = value;
1703 ctxt->node = value;
1704 return (ctxt->nodeNr++);
1705 }
1706
1707 /**
1708 * nodePop:
1709 * @ctxt: an XML parser context
1710 *
1711 * Pops the top element node from the node stack
1712 *
1713 * Returns the node just removed
1714 */
1715 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1716 nodePop(xmlParserCtxtPtr ctxt)
1717 {
1718 xmlNodePtr ret;
1719
1720 if (ctxt == NULL) return(NULL);
1721 if (ctxt->nodeNr <= 0)
1722 return (NULL);
1723 ctxt->nodeNr--;
1724 if (ctxt->nodeNr > 0)
1725 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1726 else
1727 ctxt->node = NULL;
1728 ret = ctxt->nodeTab[ctxt->nodeNr];
1729 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1730 return (ret);
1731 }
1732
1733 #ifdef LIBXML_PUSH_ENABLED
1734 /**
1735 * nameNsPush:
1736 * @ctxt: an XML parser context
1737 * @value: the element name
1738 * @prefix: the element prefix
1739 * @URI: the element namespace name
1740 *
1741 * Pushes a new element name/prefix/URL on top of the name stack
1742 *
1743 * Returns -1 in case of error, the index in the stack otherwise
1744 */
1745 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1746 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1748 {
1749 if (ctxt->nameNr >= ctxt->nameMax) {
1750 const xmlChar * *tmp;
1751 void **tmp2;
1752 ctxt->nameMax *= 2;
1753 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1754 ctxt->nameMax *
1755 sizeof(ctxt->nameTab[0]));
1756 if (tmp == NULL) {
1757 ctxt->nameMax /= 2;
1758 goto mem_error;
1759 }
1760 ctxt->nameTab = tmp;
1761 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1762 ctxt->nameMax * 3 *
1763 sizeof(ctxt->pushTab[0]));
1764 if (tmp2 == NULL) {
1765 ctxt->nameMax /= 2;
1766 goto mem_error;
1767 }
1768 ctxt->pushTab = tmp2;
1769 }
1770 ctxt->nameTab[ctxt->nameNr] = value;
1771 ctxt->name = value;
1772 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1774 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1775 return (ctxt->nameNr++);
1776 mem_error:
1777 xmlErrMemory(ctxt, NULL);
1778 return (-1);
1779 }
1780 /**
1781 * nameNsPop:
1782 * @ctxt: an XML parser context
1783 *
1784 * Pops the top element/prefix/URI name from the name stack
1785 *
1786 * Returns the name just removed
1787 */
1788 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1789 nameNsPop(xmlParserCtxtPtr ctxt)
1790 {
1791 const xmlChar *ret;
1792
1793 if (ctxt->nameNr <= 0)
1794 return (NULL);
1795 ctxt->nameNr--;
1796 if (ctxt->nameNr > 0)
1797 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1798 else
1799 ctxt->name = NULL;
1800 ret = ctxt->nameTab[ctxt->nameNr];
1801 ctxt->nameTab[ctxt->nameNr] = NULL;
1802 return (ret);
1803 }
1804 #endif /* LIBXML_PUSH_ENABLED */
1805
1806 /**
1807 * namePush:
1808 * @ctxt: an XML parser context
1809 * @value: the element name
1810 *
1811 * Pushes a new element name on top of the name stack
1812 *
1813 * Returns -1 in case of error, the index in the stack otherwise
1814 */
1815 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1816 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1817 {
1818 if (ctxt == NULL) return (-1);
1819
1820 if (ctxt->nameNr >= ctxt->nameMax) {
1821 const xmlChar * *tmp;
1822 ctxt->nameMax *= 2;
1823 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1824 ctxt->nameMax *
1825 sizeof(ctxt->nameTab[0]));
1826 if (tmp == NULL) {
1827 ctxt->nameMax /= 2;
1828 goto mem_error;
1829 }
1830 ctxt->nameTab = tmp;
1831 }
1832 ctxt->nameTab[ctxt->nameNr] = value;
1833 ctxt->name = value;
1834 return (ctxt->nameNr++);
1835 mem_error:
1836 xmlErrMemory(ctxt, NULL);
1837 return (-1);
1838 }
1839 /**
1840 * namePop:
1841 * @ctxt: an XML parser context
1842 *
1843 * Pops the top element name from the name stack
1844 *
1845 * Returns the name just removed
1846 */
1847 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1848 namePop(xmlParserCtxtPtr ctxt)
1849 {
1850 const xmlChar *ret;
1851
1852 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1853 return (NULL);
1854 ctxt->nameNr--;
1855 if (ctxt->nameNr > 0)
1856 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1857 else
1858 ctxt->name = NULL;
1859 ret = ctxt->nameTab[ctxt->nameNr];
1860 ctxt->nameTab[ctxt->nameNr] = NULL;
1861 return (ret);
1862 }
1863
spacePush(xmlParserCtxtPtr ctxt,int val)1864 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1865 if (ctxt->spaceNr >= ctxt->spaceMax) {
1866 int *tmp;
1867
1868 ctxt->spaceMax *= 2;
1869 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1871 if (tmp == NULL) {
1872 xmlErrMemory(ctxt, NULL);
1873 ctxt->spaceMax /=2;
1874 return(-1);
1875 }
1876 ctxt->spaceTab = tmp;
1877 }
1878 ctxt->spaceTab[ctxt->spaceNr] = val;
1879 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880 return(ctxt->spaceNr++);
1881 }
1882
spacePop(xmlParserCtxtPtr ctxt)1883 static int spacePop(xmlParserCtxtPtr ctxt) {
1884 int ret;
1885 if (ctxt->spaceNr <= 0) return(0);
1886 ctxt->spaceNr--;
1887 if (ctxt->spaceNr > 0)
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1889 else
1890 ctxt->space = &ctxt->spaceTab[0];
1891 ret = ctxt->spaceTab[ctxt->spaceNr];
1892 ctxt->spaceTab[ctxt->spaceNr] = -1;
1893 return(ret);
1894 }
1895
1896 /*
1897 * Macros for accessing the content. Those should be used only by the parser,
1898 * and not exported.
1899 *
1900 * Dirty macros, i.e. one often need to make assumption on the context to
1901 * use them
1902 *
1903 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1904 * To be used with extreme caution since operations consuming
1905 * characters may move the input buffer to a different location !
1906 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1907 * This should be used internally by the parser
1908 * only to compare to ASCII values otherwise it would break when
1909 * running with UTF-8 encoding.
1910 * RAW same as CUR but in the input buffer, bypass any token
1911 * extraction that may have been done
1912 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1913 * to compare on ASCII based substring.
1914 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1915 * strings without newlines within the parser.
1916 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917 * defined char within the parser.
1918 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1919 *
1920 * NEXT Skip to the next character, this does the proper decoding
1921 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1922 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1923 * CUR_CHAR(l) returns the current unicode character (int), set l
1924 * to the number of xmlChars used for the encoding [0-5].
1925 * CUR_SCHAR same but operate on a string instead of the context
1926 * COPY_BUF copy the current unicode char to the target buffer, increment
1927 * the index
1928 * GROW, SHRINK handling of input buffers
1929 */
1930
1931 #define RAW (*ctxt->input->cur)
1932 #define CUR (*ctxt->input->cur)
1933 #define NXT(val) ctxt->input->cur[(val)]
1934 #define CUR_PTR ctxt->input->cur
1935
1936 #define CMP4( s, c1, c2, c3, c4 ) \
1937 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1940 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949 ((unsigned char *) s)[ 8 ] == c9 )
1950 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952 ((unsigned char *) s)[ 9 ] == c10 )
1953
1954 #define SKIP(val) do { \
1955 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1956 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1957 if ((*ctxt->input->cur == 0) && \
1958 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1959 xmlPopInput(ctxt); \
1960 } while (0)
1961
1962 #define SKIPL(val) do { \
1963 int skipl; \
1964 for(skipl=0; skipl<val; skipl++) { \
1965 if (*(ctxt->input->cur) == '\n') { \
1966 ctxt->input->line++; ctxt->input->col = 1; \
1967 } else ctxt->input->col++; \
1968 ctxt->nbChars++; \
1969 ctxt->input->cur++; \
1970 } \
1971 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1972 if ((*ctxt->input->cur == 0) && \
1973 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1974 xmlPopInput(ctxt); \
1975 } while (0)
1976
1977 #define SHRINK if ((ctxt->progressive == 0) && \
1978 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1980 xmlSHRINK (ctxt);
1981
xmlSHRINK(xmlParserCtxtPtr ctxt)1982 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983 xmlParserInputShrink(ctxt->input);
1984 if ((*ctxt->input->cur == 0) &&
1985 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1986 xmlPopInput(ctxt);
1987 }
1988
1989 #define GROW if ((ctxt->progressive == 0) && \
1990 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1991 xmlGROW (ctxt);
1992
xmlGROW(xmlParserCtxtPtr ctxt)1993 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1995 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1996 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1997 xmlPopInput(ctxt);
1998 }
1999
2000 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2001
2002 #define NEXT xmlNextChar(ctxt)
2003
2004 #define NEXT1 { \
2005 ctxt->input->col++; \
2006 ctxt->input->cur++; \
2007 ctxt->nbChars++; \
2008 if (*ctxt->input->cur == 0) \
2009 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2010 }
2011
2012 #define NEXTL(l) do { \
2013 if (*(ctxt->input->cur) == '\n') { \
2014 ctxt->input->line++; ctxt->input->col = 1; \
2015 } else ctxt->input->col++; \
2016 ctxt->input->cur += l; \
2017 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2018 } while (0)
2019
2020 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2022
2023 #define COPY_BUF(l,b,i,v) \
2024 if (l == 1) b[i++] = (xmlChar) v; \
2025 else i += xmlCopyCharMultiByte(&b[i],v)
2026
2027 /**
2028 * xmlSkipBlankChars:
2029 * @ctxt: the XML parser context
2030 *
2031 * skip all blanks character found at that point in the input streams.
2032 * It pops up finished entities in the process if allowable at that point.
2033 *
2034 * Returns the number of space chars skipped
2035 */
2036
2037 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2038 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2039 int res = 0;
2040
2041 /*
2042 * It's Okay to use CUR/NEXT here since all the blanks are on
2043 * the ASCII range.
2044 */
2045 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2046 const xmlChar *cur;
2047 /*
2048 * if we are in the document content, go really fast
2049 */
2050 cur = ctxt->input->cur;
2051 while (IS_BLANK_CH(*cur)) {
2052 if (*cur == '\n') {
2053 ctxt->input->line++; ctxt->input->col = 1;
2054 }
2055 cur++;
2056 res++;
2057 if (*cur == 0) {
2058 ctxt->input->cur = cur;
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060 cur = ctxt->input->cur;
2061 }
2062 }
2063 ctxt->input->cur = cur;
2064 } else {
2065 int cur;
2066 do {
2067 cur = CUR;
2068 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2069 NEXT;
2070 cur = CUR;
2071 res++;
2072 }
2073 while ((cur == 0) && (ctxt->inputNr > 1) &&
2074 (ctxt->instate != XML_PARSER_COMMENT)) {
2075 xmlPopInput(ctxt);
2076 cur = CUR;
2077 }
2078 /*
2079 * Need to handle support of entities branching here
2080 */
2081 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2083 }
2084 return(res);
2085 }
2086
2087 /************************************************************************
2088 * *
2089 * Commodity functions to handle entities *
2090 * *
2091 ************************************************************************/
2092
2093 /**
2094 * xmlPopInput:
2095 * @ctxt: an XML parser context
2096 *
2097 * xmlPopInput: the current input pointed by ctxt->input came to an end
2098 * pop it and return the next char.
2099 *
2100 * Returns the current xmlChar in the parser context
2101 */
2102 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2103 xmlPopInput(xmlParserCtxtPtr ctxt) {
2104 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2105 if (xmlParserDebugEntities)
2106 xmlGenericError(xmlGenericErrorContext,
2107 "Popping input %d\n", ctxt->inputNr);
2108 xmlFreeInputStream(inputPop(ctxt));
2109 if ((*ctxt->input->cur == 0) &&
2110 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111 return(xmlPopInput(ctxt));
2112 return(CUR);
2113 }
2114
2115 /**
2116 * xmlPushInput:
2117 * @ctxt: an XML parser context
2118 * @input: an XML parser input fragment (entity, XML fragment ...).
2119 *
2120 * xmlPushInput: switch to a new input stream which is stacked on top
2121 * of the previous one(s).
2122 * Returns -1 in case of error or the index in the input stack
2123 */
2124 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2125 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2126 int ret;
2127 if (input == NULL) return(-1);
2128
2129 if (xmlParserDebugEntities) {
2130 if ((ctxt->input != NULL) && (ctxt->input->filename))
2131 xmlGenericError(xmlGenericErrorContext,
2132 "%s(%d): ", ctxt->input->filename,
2133 ctxt->input->line);
2134 xmlGenericError(xmlGenericErrorContext,
2135 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2136 }
2137 ret = inputPush(ctxt, input);
2138 GROW;
2139 return(ret);
2140 }
2141
2142 /**
2143 * xmlParseCharRef:
2144 * @ctxt: an XML parser context
2145 *
2146 * parse Reference declarations
2147 *
2148 * [66] CharRef ::= '&#' [0-9]+ ';' |
2149 * '&#x' [0-9a-fA-F]+ ';'
2150 *
2151 * [ WFC: Legal Character ]
2152 * Characters referred to using character references must match the
2153 * production for Char.
2154 *
2155 * Returns the value parsed (as an int), 0 in case of error
2156 */
2157 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2158 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2159 unsigned int val = 0;
2160 int count = 0;
2161 unsigned int outofrange = 0;
2162
2163 /*
2164 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2165 */
2166 if ((RAW == '&') && (NXT(1) == '#') &&
2167 (NXT(2) == 'x')) {
2168 SKIP(3);
2169 GROW;
2170 while (RAW != ';') { /* loop blocked by count */
2171 if (count++ > 20) {
2172 count = 0;
2173 GROW;
2174 }
2175 if ((RAW >= '0') && (RAW <= '9'))
2176 val = val * 16 + (CUR - '0');
2177 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178 val = val * 16 + (CUR - 'a') + 10;
2179 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180 val = val * 16 + (CUR - 'A') + 10;
2181 else {
2182 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2183 val = 0;
2184 break;
2185 }
2186 if (val > 0x10FFFF)
2187 outofrange = val;
2188
2189 NEXT;
2190 count++;
2191 }
2192 if (RAW == ';') {
2193 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2194 ctxt->input->col++;
2195 ctxt->nbChars ++;
2196 ctxt->input->cur++;
2197 }
2198 } else if ((RAW == '&') && (NXT(1) == '#')) {
2199 SKIP(2);
2200 GROW;
2201 while (RAW != ';') { /* loop blocked by count */
2202 if (count++ > 20) {
2203 count = 0;
2204 GROW;
2205 }
2206 if ((RAW >= '0') && (RAW <= '9'))
2207 val = val * 10 + (CUR - '0');
2208 else {
2209 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2210 val = 0;
2211 break;
2212 }
2213 if (val > 0x10FFFF)
2214 outofrange = val;
2215
2216 NEXT;
2217 count++;
2218 }
2219 if (RAW == ';') {
2220 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2221 ctxt->input->col++;
2222 ctxt->nbChars ++;
2223 ctxt->input->cur++;
2224 }
2225 } else {
2226 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2227 }
2228
2229 /*
2230 * [ WFC: Legal Character ]
2231 * Characters referred to using character references must match the
2232 * production for Char.
2233 */
2234 if ((IS_CHAR(val) && (outofrange == 0))) {
2235 return(val);
2236 } else {
2237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238 "xmlParseCharRef: invalid xmlChar value %d\n",
2239 val);
2240 }
2241 return(0);
2242 }
2243
2244 /**
2245 * xmlParseStringCharRef:
2246 * @ctxt: an XML parser context
2247 * @str: a pointer to an index in the string
2248 *
2249 * parse Reference declarations, variant parsing from a string rather
2250 * than an an input flow.
2251 *
2252 * [66] CharRef ::= '&#' [0-9]+ ';' |
2253 * '&#x' [0-9a-fA-F]+ ';'
2254 *
2255 * [ WFC: Legal Character ]
2256 * Characters referred to using character references must match the
2257 * production for Char.
2258 *
2259 * Returns the value parsed (as an int), 0 in case of error, str will be
2260 * updated to the current value of the index
2261 */
2262 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2263 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2264 const xmlChar *ptr;
2265 xmlChar cur;
2266 unsigned int val = 0;
2267 unsigned int outofrange = 0;
2268
2269 if ((str == NULL) || (*str == NULL)) return(0);
2270 ptr = *str;
2271 cur = *ptr;
2272 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2273 ptr += 3;
2274 cur = *ptr;
2275 while (cur != ';') { /* Non input consuming loop */
2276 if ((cur >= '0') && (cur <= '9'))
2277 val = val * 16 + (cur - '0');
2278 else if ((cur >= 'a') && (cur <= 'f'))
2279 val = val * 16 + (cur - 'a') + 10;
2280 else if ((cur >= 'A') && (cur <= 'F'))
2281 val = val * 16 + (cur - 'A') + 10;
2282 else {
2283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2284 val = 0;
2285 break;
2286 }
2287 if (val > 0x10FFFF)
2288 outofrange = val;
2289
2290 ptr++;
2291 cur = *ptr;
2292 }
2293 if (cur == ';')
2294 ptr++;
2295 } else if ((cur == '&') && (ptr[1] == '#')){
2296 ptr += 2;
2297 cur = *ptr;
2298 while (cur != ';') { /* Non input consuming loops */
2299 if ((cur >= '0') && (cur <= '9'))
2300 val = val * 10 + (cur - '0');
2301 else {
2302 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2303 val = 0;
2304 break;
2305 }
2306 if (val > 0x10FFFF)
2307 outofrange = val;
2308
2309 ptr++;
2310 cur = *ptr;
2311 }
2312 if (cur == ';')
2313 ptr++;
2314 } else {
2315 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2316 return(0);
2317 }
2318 *str = ptr;
2319
2320 /*
2321 * [ WFC: Legal Character ]
2322 * Characters referred to using character references must match the
2323 * production for Char.
2324 */
2325 if ((IS_CHAR(val) && (outofrange == 0))) {
2326 return(val);
2327 } else {
2328 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2330 val);
2331 }
2332 return(0);
2333 }
2334
2335 /**
2336 * xmlNewBlanksWrapperInputStream:
2337 * @ctxt: an XML parser context
2338 * @entity: an Entity pointer
2339 *
2340 * Create a new input stream for wrapping
2341 * blanks around a PEReference
2342 *
2343 * Returns the new input stream or NULL
2344 */
2345
deallocblankswrapper(xmlChar * str)2346 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2347
2348 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2349 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350 xmlParserInputPtr input;
2351 xmlChar *buffer;
2352 size_t length;
2353 if (entity == NULL) {
2354 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355 "xmlNewBlanksWrapperInputStream entity\n");
2356 return(NULL);
2357 }
2358 if (xmlParserDebugEntities)
2359 xmlGenericError(xmlGenericErrorContext,
2360 "new blanks wrapper for entity: %s\n", entity->name);
2361 input = xmlNewInputStream(ctxt);
2362 if (input == NULL) {
2363 return(NULL);
2364 }
2365 length = xmlStrlen(entity->name) + 5;
2366 buffer = xmlMallocAtomic(length);
2367 if (buffer == NULL) {
2368 xmlErrMemory(ctxt, NULL);
2369 xmlFree(input);
2370 return(NULL);
2371 }
2372 buffer [0] = ' ';
2373 buffer [1] = '%';
2374 buffer [length-3] = ';';
2375 buffer [length-2] = ' ';
2376 buffer [length-1] = 0;
2377 memcpy(buffer + 2, entity->name, length - 5);
2378 input->free = deallocblankswrapper;
2379 input->base = buffer;
2380 input->cur = buffer;
2381 input->length = length;
2382 input->end = &buffer[length];
2383 return(input);
2384 }
2385
2386 /**
2387 * xmlParserHandlePEReference:
2388 * @ctxt: the parser context
2389 *
2390 * [69] PEReference ::= '%' Name ';'
2391 *
2392 * [ WFC: No Recursion ]
2393 * A parsed entity must not contain a recursive
2394 * reference to itself, either directly or indirectly.
2395 *
2396 * [ WFC: Entity Declared ]
2397 * In a document without any DTD, a document with only an internal DTD
2398 * subset which contains no parameter entity references, or a document
2399 * with "standalone='yes'", ... ... The declaration of a parameter
2400 * entity must precede any reference to it...
2401 *
2402 * [ VC: Entity Declared ]
2403 * In a document with an external subset or external parameter entities
2404 * with "standalone='no'", ... ... The declaration of a parameter entity
2405 * must precede any reference to it...
2406 *
2407 * [ WFC: In DTD ]
2408 * Parameter-entity references may only appear in the DTD.
2409 * NOTE: misleading but this is handled.
2410 *
2411 * A PEReference may have been detected in the current input stream
2412 * the handling is done accordingly to
2413 * http://www.w3.org/TR/REC-xml#entproc
2414 * i.e.
2415 * - Included in literal in entity values
2416 * - Included as Parameter Entity reference within DTDs
2417 */
2418 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2419 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2420 const xmlChar *name;
2421 xmlEntityPtr entity = NULL;
2422 xmlParserInputPtr input;
2423
2424 if (RAW != '%') return;
2425 switch(ctxt->instate) {
2426 case XML_PARSER_CDATA_SECTION:
2427 return;
2428 case XML_PARSER_COMMENT:
2429 return;
2430 case XML_PARSER_START_TAG:
2431 return;
2432 case XML_PARSER_END_TAG:
2433 return;
2434 case XML_PARSER_EOF:
2435 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2436 return;
2437 case XML_PARSER_PROLOG:
2438 case XML_PARSER_START:
2439 case XML_PARSER_MISC:
2440 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2441 return;
2442 case XML_PARSER_ENTITY_DECL:
2443 case XML_PARSER_CONTENT:
2444 case XML_PARSER_ATTRIBUTE_VALUE:
2445 case XML_PARSER_PI:
2446 case XML_PARSER_SYSTEM_LITERAL:
2447 case XML_PARSER_PUBLIC_LITERAL:
2448 /* we just ignore it there */
2449 return;
2450 case XML_PARSER_EPILOG:
2451 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2452 return;
2453 case XML_PARSER_ENTITY_VALUE:
2454 /*
2455 * NOTE: in the case of entity values, we don't do the
2456 * substitution here since we need the literal
2457 * entity value to be able to save the internal
2458 * subset of the document.
2459 * This will be handled by xmlStringDecodeEntities
2460 */
2461 return;
2462 case XML_PARSER_DTD:
2463 /*
2464 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465 * In the internal DTD subset, parameter-entity references
2466 * can occur only where markup declarations can occur, not
2467 * within markup declarations.
2468 * In that case this is handled in xmlParseMarkupDecl
2469 */
2470 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2471 return;
2472 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2473 return;
2474 break;
2475 case XML_PARSER_IGNORE:
2476 return;
2477 }
2478
2479 NEXT;
2480 name = xmlParseName(ctxt);
2481 if (xmlParserDebugEntities)
2482 xmlGenericError(xmlGenericErrorContext,
2483 "PEReference: %s\n", name);
2484 if (name == NULL) {
2485 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2486 } else {
2487 if (RAW == ';') {
2488 NEXT;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491 if (entity == NULL) {
2492
2493 /*
2494 * [ WFC: Entity Declared ]
2495 * In a document without any DTD, a document with only an
2496 * internal DTD subset which contains no parameter entity
2497 * references, or a document with "standalone='yes'", ...
2498 * ... The declaration of a parameter entity must precede
2499 * any reference to it...
2500 */
2501 if ((ctxt->standalone == 1) ||
2502 ((ctxt->hasExternalSubset == 0) &&
2503 (ctxt->hasPErefs == 0))) {
2504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2505 "PEReference: %%%s; not found\n", name);
2506 } else {
2507 /*
2508 * [ VC: Entity Declared ]
2509 * In a document with an external subset or external
2510 * parameter entities with "standalone='no'", ...
2511 * ... The declaration of a parameter entity must precede
2512 * any reference to it...
2513 */
2514 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516 "PEReference: %%%s; not found\n",
2517 name, NULL);
2518 } else
2519 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520 "PEReference: %%%s; not found\n",
2521 name, NULL);
2522 ctxt->valid = 0;
2523 }
2524 } else if (ctxt->input->free != deallocblankswrapper) {
2525 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2526 if (xmlPushInput(ctxt, input) < 0)
2527 return;
2528 } else {
2529 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2531 xmlChar start[4];
2532 xmlCharEncoding enc;
2533
2534 /*
2535 * handle the extra spaces added before and after
2536 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2537 * this is done independently.
2538 */
2539 input = xmlNewEntityInputStream(ctxt, entity);
2540 if (xmlPushInput(ctxt, input) < 0)
2541 return;
2542
2543 /*
2544 * Get the 4 first bytes and decode the charset
2545 * if enc != XML_CHAR_ENCODING_NONE
2546 * plug some encoding conversion routines.
2547 * Note that, since we may have some non-UTF8
2548 * encoding (like UTF16, bug 135229), the 'length'
2549 * is not known, but we can calculate based upon
2550 * the amount of data in the buffer.
2551 */
2552 GROW
2553 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2554 start[0] = RAW;
2555 start[1] = NXT(1);
2556 start[2] = NXT(2);
2557 start[3] = NXT(3);
2558 enc = xmlDetectCharEncoding(start, 4);
2559 if (enc != XML_CHAR_ENCODING_NONE) {
2560 xmlSwitchEncoding(ctxt, enc);
2561 }
2562 }
2563
2564 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2565 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566 (IS_BLANK_CH(NXT(5)))) {
2567 xmlParseTextDecl(ctxt);
2568 }
2569 } else {
2570 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571 "PEReference: %s is not a parameter entity\n",
2572 name);
2573 }
2574 }
2575 } else {
2576 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2577 }
2578 }
2579 }
2580
2581 /*
2582 * Macro used to grow the current buffer.
2583 */
2584 #define growBuffer(buffer, n) { \
2585 xmlChar *tmp; \
2586 buffer##_size *= 2; \
2587 buffer##_size += n; \
2588 tmp = (xmlChar *) \
2589 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2590 if (tmp == NULL) goto mem_error; \
2591 buffer = tmp; \
2592 }
2593
2594 /**
2595 * xmlStringLenDecodeEntities:
2596 * @ctxt: the parser context
2597 * @str: the input string
2598 * @len: the string length
2599 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600 * @end: an end marker xmlChar, 0 if none
2601 * @end2: an end marker xmlChar, 0 if none
2602 * @end3: an end marker xmlChar, 0 if none
2603 *
2604 * Takes a entity string content and process to do the adequate substitutions.
2605 *
2606 * [67] Reference ::= EntityRef | CharRef
2607 *
2608 * [69] PEReference ::= '%' Name ';'
2609 *
2610 * Returns A newly allocated string with the substitution done. The caller
2611 * must deallocate it !
2612 */
2613 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2614 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2616 xmlChar *buffer = NULL;
2617 int buffer_size = 0;
2618
2619 xmlChar *current = NULL;
2620 xmlChar *rep = NULL;
2621 const xmlChar *last;
2622 xmlEntityPtr ent;
2623 int c,l;
2624 int nbchars = 0;
2625
2626 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2627 return(NULL);
2628 last = str + len;
2629
2630 if (((ctxt->depth > 40) &&
2631 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632 (ctxt->depth > 1024)) {
2633 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2634 return(NULL);
2635 }
2636
2637 /*
2638 * allocate a translation buffer.
2639 */
2640 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2641 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2642 if (buffer == NULL) goto mem_error;
2643
2644 /*
2645 * OK loop until we reach one of the ending char or a size limit.
2646 * we are operating on already parsed values.
2647 */
2648 if (str < last)
2649 c = CUR_SCHAR(str, l);
2650 else
2651 c = 0;
2652 while ((c != 0) && (c != end) && /* non input consuming loop */
2653 (c != end2) && (c != end3)) {
2654
2655 if (c == 0) break;
2656 if ((c == '&') && (str[1] == '#')) {
2657 int val = xmlParseStringCharRef(ctxt, &str);
2658 if (val != 0) {
2659 COPY_BUF(0,buffer,nbchars,val);
2660 }
2661 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2662 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2663 }
2664 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665 if (xmlParserDebugEntities)
2666 xmlGenericError(xmlGenericErrorContext,
2667 "String decoding Entity Reference: %.30s\n",
2668 str);
2669 ent = xmlParseStringEntityRef(ctxt, &str);
2670 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2672 goto int_error;
2673 if (ent != NULL)
2674 ctxt->nbentities += ent->checked;
2675 if ((ent != NULL) &&
2676 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677 if (ent->content != NULL) {
2678 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2679 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681 }
2682 } else {
2683 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684 "predefined entity has no content\n");
2685 }
2686 } else if ((ent != NULL) && (ent->content != NULL)) {
2687 ctxt->depth++;
2688 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2689 0, 0, 0);
2690 ctxt->depth--;
2691
2692 if (rep != NULL) {
2693 current = rep;
2694 while (*current != 0) { /* non input consuming loop */
2695 buffer[nbchars++] = *current++;
2696 if (nbchars >
2697 buffer_size - XML_PARSER_BUFFER_SIZE) {
2698 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2699 goto int_error;
2700 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2701 }
2702 }
2703 xmlFree(rep);
2704 rep = NULL;
2705 }
2706 } else if (ent != NULL) {
2707 int i = xmlStrlen(ent->name);
2708 const xmlChar *cur = ent->name;
2709
2710 buffer[nbchars++] = '&';
2711 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2712 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713 }
2714 for (;i > 0;i--)
2715 buffer[nbchars++] = *cur++;
2716 buffer[nbchars++] = ';';
2717 }
2718 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719 if (xmlParserDebugEntities)
2720 xmlGenericError(xmlGenericErrorContext,
2721 "String decoding PE Reference: %.30s\n", str);
2722 ent = xmlParseStringPEReference(ctxt, &str);
2723 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2724 goto int_error;
2725 if (ent != NULL)
2726 ctxt->nbentities += ent->checked;
2727 if (ent != NULL) {
2728 if (ent->content == NULL) {
2729 xmlLoadEntityContent(ctxt, ent);
2730 }
2731 ctxt->depth++;
2732 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733 0, 0, 0);
2734 ctxt->depth--;
2735 if (rep != NULL) {
2736 current = rep;
2737 while (*current != 0) { /* non input consuming loop */
2738 buffer[nbchars++] = *current++;
2739 if (nbchars >
2740 buffer_size - XML_PARSER_BUFFER_SIZE) {
2741 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2742 goto int_error;
2743 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2744 }
2745 }
2746 xmlFree(rep);
2747 rep = NULL;
2748 }
2749 }
2750 } else {
2751 COPY_BUF(l,buffer,nbchars,c);
2752 str += l;
2753 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2754 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2755 }
2756 }
2757 if (str < last)
2758 c = CUR_SCHAR(str, l);
2759 else
2760 c = 0;
2761 }
2762 buffer[nbchars] = 0;
2763 return(buffer);
2764
2765 mem_error:
2766 xmlErrMemory(ctxt, NULL);
2767 int_error:
2768 if (rep != NULL)
2769 xmlFree(rep);
2770 if (buffer != NULL)
2771 xmlFree(buffer);
2772 return(NULL);
2773 }
2774
2775 /**
2776 * xmlStringDecodeEntities:
2777 * @ctxt: the parser context
2778 * @str: the input string
2779 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780 * @end: an end marker xmlChar, 0 if none
2781 * @end2: an end marker xmlChar, 0 if none
2782 * @end3: an end marker xmlChar, 0 if none
2783 *
2784 * Takes a entity string content and process to do the adequate substitutions.
2785 *
2786 * [67] Reference ::= EntityRef | CharRef
2787 *
2788 * [69] PEReference ::= '%' Name ';'
2789 *
2790 * Returns A newly allocated string with the substitution done. The caller
2791 * must deallocate it !
2792 */
2793 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2794 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795 xmlChar end, xmlChar end2, xmlChar end3) {
2796 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2797 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2798 end, end2, end3));
2799 }
2800
2801 /************************************************************************
2802 * *
2803 * Commodity functions, cleanup needed ? *
2804 * *
2805 ************************************************************************/
2806
2807 /**
2808 * areBlanks:
2809 * @ctxt: an XML parser context
2810 * @str: a xmlChar *
2811 * @len: the size of @str
2812 * @blank_chars: we know the chars are blanks
2813 *
2814 * Is this a sequence of blank chars that one can ignore ?
2815 *
2816 * Returns 1 if ignorable 0 otherwise.
2817 */
2818
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2819 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2820 int blank_chars) {
2821 int i, ret;
2822 xmlNodePtr lastChild;
2823
2824 /*
2825 * Don't spend time trying to differentiate them, the same callback is
2826 * used !
2827 */
2828 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2829 return(0);
2830
2831 /*
2832 * Check for xml:space value.
2833 */
2834 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835 (*(ctxt->space) == -2))
2836 return(0);
2837
2838 /*
2839 * Check that the string is made of blanks
2840 */
2841 if (blank_chars == 0) {
2842 for (i = 0;i < len;i++)
2843 if (!(IS_BLANK_CH(str[i]))) return(0);
2844 }
2845
2846 /*
2847 * Look if the element is mixed content in the DTD if available
2848 */
2849 if (ctxt->node == NULL) return(0);
2850 if (ctxt->myDoc != NULL) {
2851 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852 if (ret == 0) return(1);
2853 if (ret == 1) return(0);
2854 }
2855
2856 /*
2857 * Otherwise, heuristic :-\
2858 */
2859 if ((RAW != '<') && (RAW != 0xD)) return(0);
2860 if ((ctxt->node->children == NULL) &&
2861 (RAW == '<') && (NXT(1) == '/')) return(0);
2862
2863 lastChild = xmlGetLastChild(ctxt->node);
2864 if (lastChild == NULL) {
2865 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866 (ctxt->node->content != NULL)) return(0);
2867 } else if (xmlNodeIsText(lastChild))
2868 return(0);
2869 else if ((ctxt->node->children != NULL) &&
2870 (xmlNodeIsText(ctxt->node->children)))
2871 return(0);
2872 return(1);
2873 }
2874
2875 /************************************************************************
2876 * *
2877 * Extra stuff for namespace support *
2878 * Relates to http://www.w3.org/TR/WD-xml-names *
2879 * *
2880 ************************************************************************/
2881
2882 /**
2883 * xmlSplitQName:
2884 * @ctxt: an XML parser context
2885 * @name: an XML parser context
2886 * @prefix: a xmlChar **
2887 *
2888 * parse an UTF8 encoded XML qualified name string
2889 *
2890 * [NS 5] QName ::= (Prefix ':')? LocalPart
2891 *
2892 * [NS 6] Prefix ::= NCName
2893 *
2894 * [NS 7] LocalPart ::= NCName
2895 *
2896 * Returns the local part, and prefix is updated
2897 * to get the Prefix if any.
2898 */
2899
2900 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2901 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902 xmlChar buf[XML_MAX_NAMELEN + 5];
2903 xmlChar *buffer = NULL;
2904 int len = 0;
2905 int max = XML_MAX_NAMELEN;
2906 xmlChar *ret = NULL;
2907 const xmlChar *cur = name;
2908 int c;
2909
2910 if (prefix == NULL) return(NULL);
2911 *prefix = NULL;
2912
2913 if (cur == NULL) return(NULL);
2914
2915 #ifndef XML_XML_NAMESPACE
2916 /* xml: prefix is not really a namespace */
2917 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918 (cur[2] == 'l') && (cur[3] == ':'))
2919 return(xmlStrdup(name));
2920 #endif
2921
2922 /* nasty but well=formed */
2923 if (cur[0] == ':')
2924 return(xmlStrdup(name));
2925
2926 c = *cur++;
2927 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2928 buf[len++] = c;
2929 c = *cur++;
2930 }
2931 if (len >= max) {
2932 /*
2933 * Okay someone managed to make a huge name, so he's ready to pay
2934 * for the processing speed.
2935 */
2936 max = len * 2;
2937
2938 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2939 if (buffer == NULL) {
2940 xmlErrMemory(ctxt, NULL);
2941 return(NULL);
2942 }
2943 memcpy(buffer, buf, len);
2944 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945 if (len + 10 > max) {
2946 xmlChar *tmp;
2947
2948 max *= 2;
2949 tmp = (xmlChar *) xmlRealloc(buffer,
2950 max * sizeof(xmlChar));
2951 if (tmp == NULL) {
2952 xmlFree(buffer);
2953 xmlErrMemory(ctxt, NULL);
2954 return(NULL);
2955 }
2956 buffer = tmp;
2957 }
2958 buffer[len++] = c;
2959 c = *cur++;
2960 }
2961 buffer[len] = 0;
2962 }
2963
2964 if ((c == ':') && (*cur == 0)) {
2965 if (buffer != NULL)
2966 xmlFree(buffer);
2967 *prefix = NULL;
2968 return(xmlStrdup(name));
2969 }
2970
2971 if (buffer == NULL)
2972 ret = xmlStrndup(buf, len);
2973 else {
2974 ret = buffer;
2975 buffer = NULL;
2976 max = XML_MAX_NAMELEN;
2977 }
2978
2979
2980 if (c == ':') {
2981 c = *cur;
2982 *prefix = ret;
2983 if (c == 0) {
2984 return(xmlStrndup(BAD_CAST "", 0));
2985 }
2986 len = 0;
2987
2988 /*
2989 * Check that the first character is proper to start
2990 * a new name
2991 */
2992 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993 ((c >= 0x41) && (c <= 0x5A)) ||
2994 (c == '_') || (c == ':'))) {
2995 int l;
2996 int first = CUR_SCHAR(cur, l);
2997
2998 if (!IS_LETTER(first) && (first != '_')) {
2999 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3000 "Name %s is not XML Namespace compliant\n",
3001 name);
3002 }
3003 }
3004 cur++;
3005
3006 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3007 buf[len++] = c;
3008 c = *cur++;
3009 }
3010 if (len >= max) {
3011 /*
3012 * Okay someone managed to make a huge name, so he's ready to pay
3013 * for the processing speed.
3014 */
3015 max = len * 2;
3016
3017 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3018 if (buffer == NULL) {
3019 xmlErrMemory(ctxt, NULL);
3020 return(NULL);
3021 }
3022 memcpy(buffer, buf, len);
3023 while (c != 0) { /* tested bigname2.xml */
3024 if (len + 10 > max) {
3025 xmlChar *tmp;
3026
3027 max *= 2;
3028 tmp = (xmlChar *) xmlRealloc(buffer,
3029 max * sizeof(xmlChar));
3030 if (tmp == NULL) {
3031 xmlErrMemory(ctxt, NULL);
3032 xmlFree(buffer);
3033 return(NULL);
3034 }
3035 buffer = tmp;
3036 }
3037 buffer[len++] = c;
3038 c = *cur++;
3039 }
3040 buffer[len] = 0;
3041 }
3042
3043 if (buffer == NULL)
3044 ret = xmlStrndup(buf, len);
3045 else {
3046 ret = buffer;
3047 }
3048 }
3049
3050 return(ret);
3051 }
3052
3053 /************************************************************************
3054 * *
3055 * The parser itself *
3056 * Relates to http://www.w3.org/TR/REC-xml *
3057 * *
3058 ************************************************************************/
3059
3060 /************************************************************************
3061 * *
3062 * Routines to parse Name, NCName and NmToken *
3063 * *
3064 ************************************************************************/
3065 #ifdef DEBUG
3066 static unsigned long nbParseName = 0;
3067 static unsigned long nbParseNmToken = 0;
3068 static unsigned long nbParseNCName = 0;
3069 static unsigned long nbParseNCNameComplex = 0;
3070 static unsigned long nbParseNameComplex = 0;
3071 static unsigned long nbParseStringName = 0;
3072 #endif
3073
3074 /*
3075 * The two following functions are related to the change of accepted
3076 * characters for Name and NmToken in the Revision 5 of XML-1.0
3077 * They correspond to the modified production [4] and the new production [4a]
3078 * changes in that revision. Also note that the macros used for the
3079 * productions Letter, Digit, CombiningChar and Extender are not needed
3080 * anymore.
3081 * We still keep compatibility to pre-revision5 parsing semantic if the
3082 * new XML_PARSE_OLD10 option is given to the parser.
3083 */
3084 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3085 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087 /*
3088 * Use the new checks of production [4] [4a] amd [5] of the
3089 * Update 5 of XML-1.0
3090 */
3091 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092 (((c >= 'a') && (c <= 'z')) ||
3093 ((c >= 'A') && (c <= 'Z')) ||
3094 (c == '_') || (c == ':') ||
3095 ((c >= 0xC0) && (c <= 0xD6)) ||
3096 ((c >= 0xD8) && (c <= 0xF6)) ||
3097 ((c >= 0xF8) && (c <= 0x2FF)) ||
3098 ((c >= 0x370) && (c <= 0x37D)) ||
3099 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 ((c >= 0x200C) && (c <= 0x200D)) ||
3101 ((c >= 0x2070) && (c <= 0x218F)) ||
3102 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106 ((c >= 0x10000) && (c <= 0xEFFFF))))
3107 return(1);
3108 } else {
3109 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3110 return(1);
3111 }
3112 return(0);
3113 }
3114
3115 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3116 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118 /*
3119 * Use the new checks of production [4] [4a] amd [5] of the
3120 * Update 5 of XML-1.0
3121 */
3122 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123 (((c >= 'a') && (c <= 'z')) ||
3124 ((c >= 'A') && (c <= 'Z')) ||
3125 ((c >= '0') && (c <= '9')) || /* !start */
3126 (c == '_') || (c == ':') ||
3127 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128 ((c >= 0xC0) && (c <= 0xD6)) ||
3129 ((c >= 0xD8) && (c <= 0xF6)) ||
3130 ((c >= 0xF8) && (c <= 0x2FF)) ||
3131 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132 ((c >= 0x370) && (c <= 0x37D)) ||
3133 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134 ((c >= 0x200C) && (c <= 0x200D)) ||
3135 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136 ((c >= 0x2070) && (c <= 0x218F)) ||
3137 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141 ((c >= 0x10000) && (c <= 0xEFFFF))))
3142 return(1);
3143 } else {
3144 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145 (c == '.') || (c == '-') ||
3146 (c == '_') || (c == ':') ||
3147 (IS_COMBINING(c)) ||
3148 (IS_EXTENDER(c)))
3149 return(1);
3150 }
3151 return(0);
3152 }
3153
3154 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3155 int *len, int *alloc, int normalize);
3156
3157 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3158 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3159 int len = 0, l;
3160 int c;
3161 int count = 0;
3162
3163 #ifdef DEBUG
3164 nbParseNameComplex++;
3165 #endif
3166
3167 /*
3168 * Handler for more complex cases
3169 */
3170 GROW;
3171 c = CUR_CHAR(l);
3172 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173 /*
3174 * Use the new checks of production [4] [4a] amd [5] of the
3175 * Update 5 of XML-1.0
3176 */
3177 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178 (!(((c >= 'a') && (c <= 'z')) ||
3179 ((c >= 'A') && (c <= 'Z')) ||
3180 (c == '_') || (c == ':') ||
3181 ((c >= 0xC0) && (c <= 0xD6)) ||
3182 ((c >= 0xD8) && (c <= 0xF6)) ||
3183 ((c >= 0xF8) && (c <= 0x2FF)) ||
3184 ((c >= 0x370) && (c <= 0x37D)) ||
3185 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186 ((c >= 0x200C) && (c <= 0x200D)) ||
3187 ((c >= 0x2070) && (c <= 0x218F)) ||
3188 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3193 return(NULL);
3194 }
3195 len += l;
3196 NEXTL(l);
3197 c = CUR_CHAR(l);
3198 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199 (((c >= 'a') && (c <= 'z')) ||
3200 ((c >= 'A') && (c <= 'Z')) ||
3201 ((c >= '0') && (c <= '9')) || /* !start */
3202 (c == '_') || (c == ':') ||
3203 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204 ((c >= 0xC0) && (c <= 0xD6)) ||
3205 ((c >= 0xD8) && (c <= 0xF6)) ||
3206 ((c >= 0xF8) && (c <= 0x2FF)) ||
3207 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208 ((c >= 0x370) && (c <= 0x37D)) ||
3209 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210 ((c >= 0x200C) && (c <= 0x200D)) ||
3211 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212 ((c >= 0x2070) && (c <= 0x218F)) ||
3213 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217 ((c >= 0x10000) && (c <= 0xEFFFF))
3218 )) {
3219 if (count++ > 100) {
3220 count = 0;
3221 GROW;
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226 }
3227 } else {
3228 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229 (!IS_LETTER(c) && (c != '_') &&
3230 (c != ':'))) {
3231 return(NULL);
3232 }
3233 len += l;
3234 NEXTL(l);
3235 c = CUR_CHAR(l);
3236
3237 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239 (c == '.') || (c == '-') ||
3240 (c == '_') || (c == ':') ||
3241 (IS_COMBINING(c)) ||
3242 (IS_EXTENDER(c)))) {
3243 if (count++ > 100) {
3244 count = 0;
3245 GROW;
3246 }
3247 len += l;
3248 NEXTL(l);
3249 c = CUR_CHAR(l);
3250 }
3251 }
3252 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3255 }
3256
3257 /**
3258 * xmlParseName:
3259 * @ctxt: an XML parser context
3260 *
3261 * parse an XML name.
3262 *
3263 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264 * CombiningChar | Extender
3265 *
3266 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3267 *
3268 * [6] Names ::= Name (#x20 Name)*
3269 *
3270 * Returns the Name parsed or NULL
3271 */
3272
3273 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3274 xmlParseName(xmlParserCtxtPtr ctxt) {
3275 const xmlChar *in;
3276 const xmlChar *ret;
3277 int count = 0;
3278
3279 GROW;
3280
3281 #ifdef DEBUG
3282 nbParseName++;
3283 #endif
3284
3285 /*
3286 * Accelerator for simple ASCII names
3287 */
3288 in = ctxt->input->cur;
3289 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290 ((*in >= 0x41) && (*in <= 0x5A)) ||
3291 (*in == '_') || (*in == ':')) {
3292 in++;
3293 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294 ((*in >= 0x41) && (*in <= 0x5A)) ||
3295 ((*in >= 0x30) && (*in <= 0x39)) ||
3296 (*in == '_') || (*in == '-') ||
3297 (*in == ':') || (*in == '.'))
3298 in++;
3299 if ((*in > 0) && (*in < 0x80)) {
3300 count = in - ctxt->input->cur;
3301 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3302 ctxt->input->cur = in;
3303 ctxt->nbChars += count;
3304 ctxt->input->col += count;
3305 if (ret == NULL)
3306 xmlErrMemory(ctxt, NULL);
3307 return(ret);
3308 }
3309 }
3310 /* accelerator for special cases */
3311 return(xmlParseNameComplex(ctxt));
3312 }
3313
3314 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3315 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3316 int len = 0, l;
3317 int c;
3318 int count = 0;
3319
3320 #ifdef DEBUG
3321 nbParseNCNameComplex++;
3322 #endif
3323
3324 /*
3325 * Handler for more complex cases
3326 */
3327 GROW;
3328 c = CUR_CHAR(l);
3329 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3331 return(NULL);
3332 }
3333
3334 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336 if (count++ > 100) {
3337 count = 0;
3338 GROW;
3339 }
3340 len += l;
3341 NEXTL(l);
3342 c = CUR_CHAR(l);
3343 }
3344 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3345 }
3346
3347 /**
3348 * xmlParseNCName:
3349 * @ctxt: an XML parser context
3350 * @len: lenght of the string parsed
3351 *
3352 * parse an XML name.
3353 *
3354 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355 * CombiningChar | Extender
3356 *
3357 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3358 *
3359 * Returns the Name parsed or NULL
3360 */
3361
3362 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3363 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3364 const xmlChar *in;
3365 const xmlChar *ret;
3366 int count = 0;
3367
3368 #ifdef DEBUG
3369 nbParseNCName++;
3370 #endif
3371
3372 /*
3373 * Accelerator for simple ASCII names
3374 */
3375 in = ctxt->input->cur;
3376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 (*in == '_')) {
3379 in++;
3380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381 ((*in >= 0x41) && (*in <= 0x5A)) ||
3382 ((*in >= 0x30) && (*in <= 0x39)) ||
3383 (*in == '_') || (*in == '-') ||
3384 (*in == '.'))
3385 in++;
3386 if ((*in > 0) && (*in < 0x80)) {
3387 count = in - ctxt->input->cur;
3388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 ctxt->input->cur = in;
3390 ctxt->nbChars += count;
3391 ctxt->input->col += count;
3392 if (ret == NULL) {
3393 xmlErrMemory(ctxt, NULL);
3394 }
3395 return(ret);
3396 }
3397 }
3398 return(xmlParseNCNameComplex(ctxt));
3399 }
3400
3401 /**
3402 * xmlParseNameAndCompare:
3403 * @ctxt: an XML parser context
3404 *
3405 * parse an XML name and compares for match
3406 * (specialized for endtag parsing)
3407 *
3408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409 * and the name for mismatch
3410 */
3411
3412 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3413 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3414 register const xmlChar *cmp = other;
3415 register const xmlChar *in;
3416 const xmlChar *ret;
3417
3418 GROW;
3419
3420 in = ctxt->input->cur;
3421 while (*in != 0 && *in == *cmp) {
3422 ++in;
3423 ++cmp;
3424 ctxt->input->col++;
3425 }
3426 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3427 /* success */
3428 ctxt->input->cur = in;
3429 return (const xmlChar*) 1;
3430 }
3431 /* failure (or end of input buffer), check with full function */
3432 ret = xmlParseName (ctxt);
3433 /* strings coming from the dictionnary direct compare possible */
3434 if (ret == other) {
3435 return (const xmlChar*) 1;
3436 }
3437 return ret;
3438 }
3439
3440 /**
3441 * xmlParseStringName:
3442 * @ctxt: an XML parser context
3443 * @str: a pointer to the string pointer (IN/OUT)
3444 *
3445 * parse an XML name.
3446 *
3447 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448 * CombiningChar | Extender
3449 *
3450 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3451 *
3452 * [6] Names ::= Name (#x20 Name)*
3453 *
3454 * Returns the Name parsed or NULL. The @str pointer
3455 * is updated to the current location in the string.
3456 */
3457
3458 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3459 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460 xmlChar buf[XML_MAX_NAMELEN + 5];
3461 const xmlChar *cur = *str;
3462 int len = 0, l;
3463 int c;
3464
3465 #ifdef DEBUG
3466 nbParseStringName++;
3467 #endif
3468
3469 c = CUR_SCHAR(cur, l);
3470 if (!xmlIsNameStartChar(ctxt, c)) {
3471 return(NULL);
3472 }
3473
3474 COPY_BUF(l,buf,len,c);
3475 cur += l;
3476 c = CUR_SCHAR(cur, l);
3477 while (xmlIsNameChar(ctxt, c)) {
3478 COPY_BUF(l,buf,len,c);
3479 cur += l;
3480 c = CUR_SCHAR(cur, l);
3481 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3482 /*
3483 * Okay someone managed to make a huge name, so he's ready to pay
3484 * for the processing speed.
3485 */
3486 xmlChar *buffer;
3487 int max = len * 2;
3488
3489 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3490 if (buffer == NULL) {
3491 xmlErrMemory(ctxt, NULL);
3492 return(NULL);
3493 }
3494 memcpy(buffer, buf, len);
3495 while (xmlIsNameChar(ctxt, c)) {
3496 if (len + 10 > max) {
3497 xmlChar *tmp;
3498 max *= 2;
3499 tmp = (xmlChar *) xmlRealloc(buffer,
3500 max * sizeof(xmlChar));
3501 if (tmp == NULL) {
3502 xmlErrMemory(ctxt, NULL);
3503 xmlFree(buffer);
3504 return(NULL);
3505 }
3506 buffer = tmp;
3507 }
3508 COPY_BUF(l,buffer,len,c);
3509 cur += l;
3510 c = CUR_SCHAR(cur, l);
3511 }
3512 buffer[len] = 0;
3513 *str = cur;
3514 return(buffer);
3515 }
3516 }
3517 *str = cur;
3518 return(xmlStrndup(buf, len));
3519 }
3520
3521 /**
3522 * xmlParseNmtoken:
3523 * @ctxt: an XML parser context
3524 *
3525 * parse an XML Nmtoken.
3526 *
3527 * [7] Nmtoken ::= (NameChar)+
3528 *
3529 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3530 *
3531 * Returns the Nmtoken parsed or NULL
3532 */
3533
3534 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3535 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536 xmlChar buf[XML_MAX_NAMELEN + 5];
3537 int len = 0, l;
3538 int c;
3539 int count = 0;
3540
3541 #ifdef DEBUG
3542 nbParseNmToken++;
3543 #endif
3544
3545 GROW;
3546 c = CUR_CHAR(l);
3547
3548 while (xmlIsNameChar(ctxt, c)) {
3549 if (count++ > 100) {
3550 count = 0;
3551 GROW;
3552 }
3553 COPY_BUF(l,buf,len,c);
3554 NEXTL(l);
3555 c = CUR_CHAR(l);
3556 if (len >= XML_MAX_NAMELEN) {
3557 /*
3558 * Okay someone managed to make a huge token, so he's ready to pay
3559 * for the processing speed.
3560 */
3561 xmlChar *buffer;
3562 int max = len * 2;
3563
3564 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3565 if (buffer == NULL) {
3566 xmlErrMemory(ctxt, NULL);
3567 return(NULL);
3568 }
3569 memcpy(buffer, buf, len);
3570 while (xmlIsNameChar(ctxt, c)) {
3571 if (count++ > 100) {
3572 count = 0;
3573 GROW;
3574 }
3575 if (len + 10 > max) {
3576 xmlChar *tmp;
3577
3578 max *= 2;
3579 tmp = (xmlChar *) xmlRealloc(buffer,
3580 max * sizeof(xmlChar));
3581 if (tmp == NULL) {
3582 xmlErrMemory(ctxt, NULL);
3583 xmlFree(buffer);
3584 return(NULL);
3585 }
3586 buffer = tmp;
3587 }
3588 COPY_BUF(l,buffer,len,c);
3589 NEXTL(l);
3590 c = CUR_CHAR(l);
3591 }
3592 buffer[len] = 0;
3593 return(buffer);
3594 }
3595 }
3596 if (len == 0)
3597 return(NULL);
3598 return(xmlStrndup(buf, len));
3599 }
3600
3601 /**
3602 * xmlParseEntityValue:
3603 * @ctxt: an XML parser context
3604 * @orig: if non-NULL store a copy of the original entity value
3605 *
3606 * parse a value for ENTITY declarations
3607 *
3608 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609 * "'" ([^%&'] | PEReference | Reference)* "'"
3610 *
3611 * Returns the EntityValue parsed with reference substituted or NULL
3612 */
3613
3614 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3615 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616 xmlChar *buf = NULL;
3617 int len = 0;
3618 int size = XML_PARSER_BUFFER_SIZE;
3619 int c, l;
3620 xmlChar stop;
3621 xmlChar *ret = NULL;
3622 const xmlChar *cur = NULL;
3623 xmlParserInputPtr input;
3624
3625 if (RAW == '"') stop = '"';
3626 else if (RAW == '\'') stop = '\'';
3627 else {
3628 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3629 return(NULL);
3630 }
3631 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3632 if (buf == NULL) {
3633 xmlErrMemory(ctxt, NULL);
3634 return(NULL);
3635 }
3636
3637 /*
3638 * The content of the entity definition is copied in a buffer.
3639 */
3640
3641 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642 input = ctxt->input;
3643 GROW;
3644 NEXT;
3645 c = CUR_CHAR(l);
3646 /*
3647 * NOTE: 4.4.5 Included in Literal
3648 * When a parameter entity reference appears in a literal entity
3649 * value, ... a single or double quote character in the replacement
3650 * text is always treated as a normal data character and will not
3651 * terminate the literal.
3652 * In practice it means we stop the loop only when back at parsing
3653 * the initial entity and the quote is found
3654 */
3655 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3656 (ctxt->input != input))) {
3657 if (len + 5 >= size) {
3658 xmlChar *tmp;
3659
3660 size *= 2;
3661 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662 if (tmp == NULL) {
3663 xmlErrMemory(ctxt, NULL);
3664 xmlFree(buf);
3665 return(NULL);
3666 }
3667 buf = tmp;
3668 }
3669 COPY_BUF(l,buf,len,c);
3670 NEXTL(l);
3671 /*
3672 * Pop-up of finished entities.
3673 */
3674 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3675 xmlPopInput(ctxt);
3676
3677 GROW;
3678 c = CUR_CHAR(l);
3679 if (c == 0) {
3680 GROW;
3681 c = CUR_CHAR(l);
3682 }
3683 }
3684 buf[len] = 0;
3685
3686 /*
3687 * Raise problem w.r.t. '&' and '%' being used in non-entities
3688 * reference constructs. Note Charref will be handled in
3689 * xmlStringDecodeEntities()
3690 */
3691 cur = buf;
3692 while (*cur != 0) { /* non input consuming */
3693 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3694 xmlChar *name;
3695 xmlChar tmp = *cur;
3696
3697 cur++;
3698 name = xmlParseStringName(ctxt, &cur);
3699 if ((name == NULL) || (*cur != ';')) {
3700 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3701 "EntityValue: '%c' forbidden except for entities references\n",
3702 tmp);
3703 }
3704 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705 (ctxt->inputNr == 1)) {
3706 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3707 }
3708 if (name != NULL)
3709 xmlFree(name);
3710 if (*cur == 0)
3711 break;
3712 }
3713 cur++;
3714 }
3715
3716 /*
3717 * Then PEReference entities are substituted.
3718 */
3719 if (c != stop) {
3720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3721 xmlFree(buf);
3722 } else {
3723 NEXT;
3724 /*
3725 * NOTE: 4.4.7 Bypassed
3726 * When a general entity reference appears in the EntityValue in
3727 * an entity declaration, it is bypassed and left as is.
3728 * so XML_SUBSTITUTE_REF is not set here.
3729 */
3730 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3731 0, 0, 0);
3732 if (orig != NULL)
3733 *orig = buf;
3734 else
3735 xmlFree(buf);
3736 }
3737
3738 return(ret);
3739 }
3740
3741 /**
3742 * xmlParseAttValueComplex:
3743 * @ctxt: an XML parser context
3744 * @len: the resulting attribute len
3745 * @normalize: wether to apply the inner normalization
3746 *
3747 * parse a value for an attribute, this is the fallback function
3748 * of xmlParseAttValue() when the attribute parsing requires handling
3749 * of non-ASCII characters, or normalization compaction.
3750 *
3751 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3752 */
3753 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3754 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3755 xmlChar limit = 0;
3756 xmlChar *buf = NULL;
3757 xmlChar *rep = NULL;
3758 int len = 0;
3759 int buf_size = 0;
3760 int c, l, in_space = 0;
3761 xmlChar *current = NULL;
3762 xmlEntityPtr ent;
3763
3764 if (NXT(0) == '"') {
3765 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3766 limit = '"';
3767 NEXT;
3768 } else if (NXT(0) == '\'') {
3769 limit = '\'';
3770 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3771 NEXT;
3772 } else {
3773 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3774 return(NULL);
3775 }
3776
3777 /*
3778 * allocate a translation buffer.
3779 */
3780 buf_size = XML_PARSER_BUFFER_SIZE;
3781 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3782 if (buf == NULL) goto mem_error;
3783
3784 /*
3785 * OK loop until we reach one of the ending char or a size limit.
3786 */
3787 c = CUR_CHAR(l);
3788 while ((NXT(0) != limit) && /* checked */
3789 (IS_CHAR(c)) && (c != '<')) {
3790 if (c == 0) break;
3791 if (c == '&') {
3792 in_space = 0;
3793 if (NXT(1) == '#') {
3794 int val = xmlParseCharRef(ctxt);
3795
3796 if (val == '&') {
3797 if (ctxt->replaceEntities) {
3798 if (len > buf_size - 10) {
3799 growBuffer(buf, 10);
3800 }
3801 buf[len++] = '&';
3802 } else {
3803 /*
3804 * The reparsing will be done in xmlStringGetNodeList()
3805 * called by the attribute() function in SAX.c
3806 */
3807 if (len > buf_size - 10) {
3808 growBuffer(buf, 10);
3809 }
3810 buf[len++] = '&';
3811 buf[len++] = '#';
3812 buf[len++] = '3';
3813 buf[len++] = '8';
3814 buf[len++] = ';';
3815 }
3816 } else if (val != 0) {
3817 if (len > buf_size - 10) {
3818 growBuffer(buf, 10);
3819 }
3820 len += xmlCopyChar(0, &buf[len], val);
3821 }
3822 } else {
3823 ent = xmlParseEntityRef(ctxt);
3824 ctxt->nbentities++;
3825 if (ent != NULL)
3826 ctxt->nbentities += ent->owner;
3827 if ((ent != NULL) &&
3828 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829 if (len > buf_size - 10) {
3830 growBuffer(buf, 10);
3831 }
3832 if ((ctxt->replaceEntities == 0) &&
3833 (ent->content[0] == '&')) {
3834 buf[len++] = '&';
3835 buf[len++] = '#';
3836 buf[len++] = '3';
3837 buf[len++] = '8';
3838 buf[len++] = ';';
3839 } else {
3840 buf[len++] = ent->content[0];
3841 }
3842 } else if ((ent != NULL) &&
3843 (ctxt->replaceEntities != 0)) {
3844 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845 rep = xmlStringDecodeEntities(ctxt, ent->content,
3846 XML_SUBSTITUTE_REF,
3847 0, 0, 0);
3848 if (rep != NULL) {
3849 current = rep;
3850 while (*current != 0) { /* non input consuming */
3851 if ((*current == 0xD) || (*current == 0xA) ||
3852 (*current == 0x9)) {
3853 buf[len++] = 0x20;
3854 current++;
3855 } else
3856 buf[len++] = *current++;
3857 if (len > buf_size - 10) {
3858 growBuffer(buf, 10);
3859 }
3860 }
3861 xmlFree(rep);
3862 rep = NULL;
3863 }
3864 } else {
3865 if (len > buf_size - 10) {
3866 growBuffer(buf, 10);
3867 }
3868 if (ent->content != NULL)
3869 buf[len++] = ent->content[0];
3870 }
3871 } else if (ent != NULL) {
3872 int i = xmlStrlen(ent->name);
3873 const xmlChar *cur = ent->name;
3874
3875 /*
3876 * This may look absurd but is needed to detect
3877 * entities problems
3878 */
3879 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880 (ent->content != NULL)) {
3881 rep = xmlStringDecodeEntities(ctxt, ent->content,
3882 XML_SUBSTITUTE_REF, 0, 0, 0);
3883 if (rep != NULL) {
3884 xmlFree(rep);
3885 rep = NULL;
3886 }
3887 }
3888
3889 /*
3890 * Just output the reference
3891 */
3892 buf[len++] = '&';
3893 while (len > buf_size - i - 10) {
3894 growBuffer(buf, i + 10);
3895 }
3896 for (;i > 0;i--)
3897 buf[len++] = *cur++;
3898 buf[len++] = ';';
3899 }
3900 }
3901 } else {
3902 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903 if ((len != 0) || (!normalize)) {
3904 if ((!normalize) || (!in_space)) {
3905 COPY_BUF(l,buf,len,0x20);
3906 while (len > buf_size - 10) {
3907 growBuffer(buf, 10);
3908 }
3909 }
3910 in_space = 1;
3911 }
3912 } else {
3913 in_space = 0;
3914 COPY_BUF(l,buf,len,c);
3915 if (len > buf_size - 10) {
3916 growBuffer(buf, 10);
3917 }
3918 }
3919 NEXTL(l);
3920 }
3921 GROW;
3922 c = CUR_CHAR(l);
3923 }
3924 if ((in_space) && (normalize)) {
3925 while (buf[len - 1] == 0x20) len--;
3926 }
3927 buf[len] = 0;
3928 if (RAW == '<') {
3929 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3930 } else if (RAW != limit) {
3931 if ((c != 0) && (!IS_CHAR(c))) {
3932 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933 "invalid character in attribute value\n");
3934 } else {
3935 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936 "AttValue: ' expected\n");
3937 }
3938 } else
3939 NEXT;
3940 if (attlen != NULL) *attlen = len;
3941 return(buf);
3942
3943 mem_error:
3944 xmlErrMemory(ctxt, NULL);
3945 if (buf != NULL)
3946 xmlFree(buf);
3947 if (rep != NULL)
3948 xmlFree(rep);
3949 return(NULL);
3950 }
3951
3952 /**
3953 * xmlParseAttValue:
3954 * @ctxt: an XML parser context
3955 *
3956 * parse a value for an attribute
3957 * Note: the parser won't do substitution of entities here, this
3958 * will be handled later in xmlStringGetNodeList
3959 *
3960 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961 * "'" ([^<&'] | Reference)* "'"
3962 *
3963 * 3.3.3 Attribute-Value Normalization:
3964 * Before the value of an attribute is passed to the application or
3965 * checked for validity, the XML processor must normalize it as follows:
3966 * - a character reference is processed by appending the referenced
3967 * character to the attribute value
3968 * - an entity reference is processed by recursively processing the
3969 * replacement text of the entity
3970 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971 * appending #x20 to the normalized value, except that only a single
3972 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3973 * parsed entity or the literal entity value of an internal parsed entity
3974 * - other characters are processed by appending them to the normalized value
3975 * If the declared value is not CDATA, then the XML processor must further
3976 * process the normalized attribute value by discarding any leading and
3977 * trailing space (#x20) characters, and by replacing sequences of space
3978 * (#x20) characters by a single space (#x20) character.
3979 * All attributes for which no declaration has been read should be treated
3980 * by a non-validating parser as if declared CDATA.
3981 *
3982 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3983 */
3984
3985
3986 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3987 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3988 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3989 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3990 }
3991
3992 /**
3993 * xmlParseSystemLiteral:
3994 * @ctxt: an XML parser context
3995 *
3996 * parse an XML Literal
3997 *
3998 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3999 *
4000 * Returns the SystemLiteral parsed or NULL
4001 */
4002
4003 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4004 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005 xmlChar *buf = NULL;
4006 int len = 0;
4007 int size = XML_PARSER_BUFFER_SIZE;
4008 int cur, l;
4009 xmlChar stop;
4010 int state = ctxt->instate;
4011 int count = 0;
4012
4013 SHRINK;
4014 if (RAW == '"') {
4015 NEXT;
4016 stop = '"';
4017 } else if (RAW == '\'') {
4018 NEXT;
4019 stop = '\'';
4020 } else {
4021 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4022 return(NULL);
4023 }
4024
4025 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4026 if (buf == NULL) {
4027 xmlErrMemory(ctxt, NULL);
4028 return(NULL);
4029 }
4030 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4031 cur = CUR_CHAR(l);
4032 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4033 if (len + 5 >= size) {
4034 xmlChar *tmp;
4035
4036 size *= 2;
4037 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4038 if (tmp == NULL) {
4039 xmlFree(buf);
4040 xmlErrMemory(ctxt, NULL);
4041 ctxt->instate = (xmlParserInputState) state;
4042 return(NULL);
4043 }
4044 buf = tmp;
4045 }
4046 count++;
4047 if (count > 50) {
4048 GROW;
4049 count = 0;
4050 }
4051 COPY_BUF(l,buf,len,cur);
4052 NEXTL(l);
4053 cur = CUR_CHAR(l);
4054 if (cur == 0) {
4055 GROW;
4056 SHRINK;
4057 cur = CUR_CHAR(l);
4058 }
4059 }
4060 buf[len] = 0;
4061 ctxt->instate = (xmlParserInputState) state;
4062 if (!IS_CHAR(cur)) {
4063 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064 } else {
4065 NEXT;
4066 }
4067 return(buf);
4068 }
4069
4070 /**
4071 * xmlParsePubidLiteral:
4072 * @ctxt: an XML parser context
4073 *
4074 * parse an XML public literal
4075 *
4076 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4077 *
4078 * Returns the PubidLiteral parsed or NULL.
4079 */
4080
4081 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4082 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083 xmlChar *buf = NULL;
4084 int len = 0;
4085 int size = XML_PARSER_BUFFER_SIZE;
4086 xmlChar cur;
4087 xmlChar stop;
4088 int count = 0;
4089 xmlParserInputState oldstate = ctxt->instate;
4090
4091 SHRINK;
4092 if (RAW == '"') {
4093 NEXT;
4094 stop = '"';
4095 } else if (RAW == '\'') {
4096 NEXT;
4097 stop = '\'';
4098 } else {
4099 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4100 return(NULL);
4101 }
4102 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4103 if (buf == NULL) {
4104 xmlErrMemory(ctxt, NULL);
4105 return(NULL);
4106 }
4107 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4108 cur = CUR;
4109 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4110 if (len + 1 >= size) {
4111 xmlChar *tmp;
4112
4113 size *= 2;
4114 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4115 if (tmp == NULL) {
4116 xmlErrMemory(ctxt, NULL);
4117 xmlFree(buf);
4118 return(NULL);
4119 }
4120 buf = tmp;
4121 }
4122 buf[len++] = cur;
4123 count++;
4124 if (count > 50) {
4125 GROW;
4126 count = 0;
4127 }
4128 NEXT;
4129 cur = CUR;
4130 if (cur == 0) {
4131 GROW;
4132 SHRINK;
4133 cur = CUR;
4134 }
4135 }
4136 buf[len] = 0;
4137 if (cur != stop) {
4138 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4139 } else {
4140 NEXT;
4141 }
4142 ctxt->instate = oldstate;
4143 return(buf);
4144 }
4145
4146 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4147
4148 /*
4149 * used for the test in the inner loop of the char data testing
4150 */
4151 static const unsigned char test_char_data[256] = {
4152 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4184 };
4185
4186 /**
4187 * xmlParseCharData:
4188 * @ctxt: an XML parser context
4189 * @cdata: int indicating whether we are within a CDATA section
4190 *
4191 * parse a CharData section.
4192 * if we are within a CDATA section ']]>' marks an end of section.
4193 *
4194 * The right angle bracket (>) may be represented using the string ">",
4195 * and must, for compatibility, be escaped using ">" or a character
4196 * reference when it appears in the string "]]>" in content, when that
4197 * string is not marking the end of a CDATA section.
4198 *
4199 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4200 */
4201
4202 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4203 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4204 const xmlChar *in;
4205 int nbchar = 0;
4206 int line = ctxt->input->line;
4207 int col = ctxt->input->col;
4208 int ccol;
4209
4210 SHRINK;
4211 GROW;
4212 /*
4213 * Accelerated common case where input don't need to be
4214 * modified before passing it to the handler.
4215 */
4216 if (!cdata) {
4217 in = ctxt->input->cur;
4218 do {
4219 get_more_space:
4220 while (*in == 0x20) { in++; ctxt->input->col++; }
4221 if (*in == 0xA) {
4222 do {
4223 ctxt->input->line++; ctxt->input->col = 1;
4224 in++;
4225 } while (*in == 0xA);
4226 goto get_more_space;
4227 }
4228 if (*in == '<') {
4229 nbchar = in - ctxt->input->cur;
4230 if (nbchar > 0) {
4231 const xmlChar *tmp = ctxt->input->cur;
4232 ctxt->input->cur = in;
4233
4234 if ((ctxt->sax != NULL) &&
4235 (ctxt->sax->ignorableWhitespace !=
4236 ctxt->sax->characters)) {
4237 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4238 if (ctxt->sax->ignorableWhitespace != NULL)
4239 ctxt->sax->ignorableWhitespace(ctxt->userData,
4240 tmp, nbchar);
4241 } else {
4242 if (ctxt->sax->characters != NULL)
4243 ctxt->sax->characters(ctxt->userData,
4244 tmp, nbchar);
4245 if (*ctxt->space == -1)
4246 *ctxt->space = -2;
4247 }
4248 } else if ((ctxt->sax != NULL) &&
4249 (ctxt->sax->characters != NULL)) {
4250 ctxt->sax->characters(ctxt->userData,
4251 tmp, nbchar);
4252 }
4253 }
4254 return;
4255 }
4256
4257 get_more:
4258 ccol = ctxt->input->col;
4259 while (test_char_data[*in]) {
4260 in++;
4261 ccol++;
4262 }
4263 ctxt->input->col = ccol;
4264 if (*in == 0xA) {
4265 do {
4266 ctxt->input->line++; ctxt->input->col = 1;
4267 in++;
4268 } while (*in == 0xA);
4269 goto get_more;
4270 }
4271 if (*in == ']') {
4272 if ((in[1] == ']') && (in[2] == '>')) {
4273 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4274 ctxt->input->cur = in;
4275 return;
4276 }
4277 in++;
4278 ctxt->input->col++;
4279 goto get_more;
4280 }
4281 nbchar = in - ctxt->input->cur;
4282 if (nbchar > 0) {
4283 if ((ctxt->sax != NULL) &&
4284 (ctxt->sax->ignorableWhitespace !=
4285 ctxt->sax->characters) &&
4286 (IS_BLANK_CH(*ctxt->input->cur))) {
4287 const xmlChar *tmp = ctxt->input->cur;
4288 ctxt->input->cur = in;
4289
4290 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4291 if (ctxt->sax->ignorableWhitespace != NULL)
4292 ctxt->sax->ignorableWhitespace(ctxt->userData,
4293 tmp, nbchar);
4294 } else {
4295 if (ctxt->sax->characters != NULL)
4296 ctxt->sax->characters(ctxt->userData,
4297 tmp, nbchar);
4298 if (*ctxt->space == -1)
4299 *ctxt->space = -2;
4300 }
4301 line = ctxt->input->line;
4302 col = ctxt->input->col;
4303 } else if (ctxt->sax != NULL) {
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 ctxt->input->cur, nbchar);
4307 line = ctxt->input->line;
4308 col = ctxt->input->col;
4309 }
4310 /* something really bad happened in the SAX callback */
4311 if (ctxt->instate != XML_PARSER_CONTENT)
4312 return;
4313 }
4314 ctxt->input->cur = in;
4315 if (*in == 0xD) {
4316 in++;
4317 if (*in == 0xA) {
4318 ctxt->input->cur = in;
4319 in++;
4320 ctxt->input->line++; ctxt->input->col = 1;
4321 continue; /* while */
4322 }
4323 in--;
4324 }
4325 if (*in == '<') {
4326 return;
4327 }
4328 if (*in == '&') {
4329 return;
4330 }
4331 SHRINK;
4332 GROW;
4333 in = ctxt->input->cur;
4334 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4335 nbchar = 0;
4336 }
4337 ctxt->input->line = line;
4338 ctxt->input->col = col;
4339 xmlParseCharDataComplex(ctxt, cdata);
4340 }
4341
4342 /**
4343 * xmlParseCharDataComplex:
4344 * @ctxt: an XML parser context
4345 * @cdata: int indicating whether we are within a CDATA section
4346 *
4347 * parse a CharData section.this is the fallback function
4348 * of xmlParseCharData() when the parsing requires handling
4349 * of non-ASCII characters.
4350 */
4351 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4352 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4353 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4354 int nbchar = 0;
4355 int cur, l;
4356 int count = 0;
4357
4358 SHRINK;
4359 GROW;
4360 cur = CUR_CHAR(l);
4361 while ((cur != '<') && /* checked */
4362 (cur != '&') &&
4363 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4364 if ((cur == ']') && (NXT(1) == ']') &&
4365 (NXT(2) == '>')) {
4366 if (cdata) break;
4367 else {
4368 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4369 }
4370 }
4371 COPY_BUF(l,buf,nbchar,cur);
4372 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4373 buf[nbchar] = 0;
4374
4375 /*
4376 * OK the segment is to be consumed as chars.
4377 */
4378 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4379 if (areBlanks(ctxt, buf, nbchar, 0)) {
4380 if (ctxt->sax->ignorableWhitespace != NULL)
4381 ctxt->sax->ignorableWhitespace(ctxt->userData,
4382 buf, nbchar);
4383 } else {
4384 if (ctxt->sax->characters != NULL)
4385 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4386 if ((ctxt->sax->characters !=
4387 ctxt->sax->ignorableWhitespace) &&
4388 (*ctxt->space == -1))
4389 *ctxt->space = -2;
4390 }
4391 }
4392 nbchar = 0;
4393 /* something really bad happened in the SAX callback */
4394 if (ctxt->instate != XML_PARSER_CONTENT)
4395 return;
4396 }
4397 count++;
4398 if (count > 50) {
4399 GROW;
4400 count = 0;
4401 }
4402 NEXTL(l);
4403 cur = CUR_CHAR(l);
4404 }
4405 if (nbchar != 0) {
4406 buf[nbchar] = 0;
4407 /*
4408 * OK the segment is to be consumed as chars.
4409 */
4410 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4411 if (areBlanks(ctxt, buf, nbchar, 0)) {
4412 if (ctxt->sax->ignorableWhitespace != NULL)
4413 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4414 } else {
4415 if (ctxt->sax->characters != NULL)
4416 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4417 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418 (*ctxt->space == -1))
4419 *ctxt->space = -2;
4420 }
4421 }
4422 }
4423 if ((cur != 0) && (!IS_CHAR(cur))) {
4424 /* Generate the error and skip the offending character */
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "PCDATA invalid Char value %d\n",
4427 cur);
4428 NEXTL(l);
4429 }
4430 }
4431
4432 /**
4433 * xmlParseExternalID:
4434 * @ctxt: an XML parser context
4435 * @publicID: a xmlChar** receiving PubidLiteral
4436 * @strict: indicate whether we should restrict parsing to only
4437 * production [75], see NOTE below
4438 *
4439 * Parse an External ID or a Public ID
4440 *
4441 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4442 * 'PUBLIC' S PubidLiteral S SystemLiteral
4443 *
4444 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4446 *
4447 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4448 *
4449 * Returns the function returns SystemLiteral and in the second
4450 * case publicID receives PubidLiteral, is strict is off
4451 * it is possible to return NULL and have publicID set.
4452 */
4453
4454 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4455 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456 xmlChar *URI = NULL;
4457
4458 SHRINK;
4459
4460 *publicID = NULL;
4461 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4462 SKIP(6);
4463 if (!IS_BLANK_CH(CUR)) {
4464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465 "Space required after 'SYSTEM'\n");
4466 }
4467 SKIP_BLANKS;
4468 URI = xmlParseSystemLiteral(ctxt);
4469 if (URI == NULL) {
4470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4471 }
4472 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4473 SKIP(6);
4474 if (!IS_BLANK_CH(CUR)) {
4475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476 "Space required after 'PUBLIC'\n");
4477 }
4478 SKIP_BLANKS;
4479 *publicID = xmlParsePubidLiteral(ctxt);
4480 if (*publicID == NULL) {
4481 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4482 }
4483 if (strict) {
4484 /*
4485 * We don't handle [83] so "S SystemLiteral" is required.
4486 */
4487 if (!IS_BLANK_CH(CUR)) {
4488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after the Public Identifier\n");
4490 }
4491 } else {
4492 /*
4493 * We handle [83] so we return immediately, if
4494 * "S SystemLiteral" is not detected. From a purely parsing
4495 * point of view that's a nice mess.
4496 */
4497 const xmlChar *ptr;
4498 GROW;
4499
4500 ptr = CUR_PTR;
4501 if (!IS_BLANK_CH(*ptr)) return(NULL);
4502
4503 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4504 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4505 }
4506 SKIP_BLANKS;
4507 URI = xmlParseSystemLiteral(ctxt);
4508 if (URI == NULL) {
4509 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4510 }
4511 }
4512 return(URI);
4513 }
4514
4515 /**
4516 * xmlParseCommentComplex:
4517 * @ctxt: an XML parser context
4518 * @buf: the already parsed part of the buffer
4519 * @len: number of bytes filles in the buffer
4520 * @size: allocated size of the buffer
4521 *
4522 * Skip an XML (SGML) comment <!-- .... -->
4523 * The spec says that "For compatibility, the string "--" (double-hyphen)
4524 * must not occur within comments. "
4525 * This is the slow routine in case the accelerator for ascii didn't work
4526 *
4527 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4528 */
4529 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4530 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4531 int q, ql;
4532 int r, rl;
4533 int cur, l;
4534 int count = 0;
4535 int inputid;
4536
4537 inputid = ctxt->input->id;
4538
4539 if (buf == NULL) {
4540 len = 0;
4541 size = XML_PARSER_BUFFER_SIZE;
4542 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4543 if (buf == NULL) {
4544 xmlErrMemory(ctxt, NULL);
4545 return;
4546 }
4547 }
4548 GROW; /* Assure there's enough input data */
4549 q = CUR_CHAR(ql);
4550 if (q == 0)
4551 goto not_terminated;
4552 if (!IS_CHAR(q)) {
4553 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554 "xmlParseComment: invalid xmlChar value %d\n",
4555 q);
4556 xmlFree (buf);
4557 return;
4558 }
4559 NEXTL(ql);
4560 r = CUR_CHAR(rl);
4561 if (r == 0)
4562 goto not_terminated;
4563 if (!IS_CHAR(r)) {
4564 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565 "xmlParseComment: invalid xmlChar value %d\n",
4566 q);
4567 xmlFree (buf);
4568 return;
4569 }
4570 NEXTL(rl);
4571 cur = CUR_CHAR(l);
4572 if (cur == 0)
4573 goto not_terminated;
4574 while (IS_CHAR(cur) && /* checked */
4575 ((cur != '>') ||
4576 (r != '-') || (q != '-'))) {
4577 if ((r == '-') && (q == '-')) {
4578 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4579 }
4580 if (len + 5 >= size) {
4581 xmlChar *new_buf;
4582 size *= 2;
4583 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584 if (new_buf == NULL) {
4585 xmlFree (buf);
4586 xmlErrMemory(ctxt, NULL);
4587 return;
4588 }
4589 buf = new_buf;
4590 }
4591 COPY_BUF(ql,buf,len,q);
4592 q = r;
4593 ql = rl;
4594 r = cur;
4595 rl = l;
4596
4597 count++;
4598 if (count > 50) {
4599 GROW;
4600 count = 0;
4601 }
4602 NEXTL(l);
4603 cur = CUR_CHAR(l);
4604 if (cur == 0) {
4605 SHRINK;
4606 GROW;
4607 cur = CUR_CHAR(l);
4608 }
4609 }
4610 buf[len] = 0;
4611 if (cur == 0) {
4612 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613 "Comment not terminated \n<!--%.50s\n", buf);
4614 } else if (!IS_CHAR(cur)) {
4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616 "xmlParseComment: invalid xmlChar value %d\n",
4617 cur);
4618 } else {
4619 if (inputid != ctxt->input->id) {
4620 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621 "Comment doesn't start and stop in the same entity\n");
4622 }
4623 NEXT;
4624 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625 (!ctxt->disableSAX))
4626 ctxt->sax->comment(ctxt->userData, buf);
4627 }
4628 xmlFree(buf);
4629 return;
4630 not_terminated:
4631 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632 "Comment not terminated\n", NULL);
4633 xmlFree(buf);
4634 return;
4635 }
4636
4637 /**
4638 * xmlParseComment:
4639 * @ctxt: an XML parser context
4640 *
4641 * Skip an XML (SGML) comment <!-- .... -->
4642 * The spec says that "For compatibility, the string "--" (double-hyphen)
4643 * must not occur within comments. "
4644 *
4645 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4646 */
4647 void
xmlParseComment(xmlParserCtxtPtr ctxt)4648 xmlParseComment(xmlParserCtxtPtr ctxt) {
4649 xmlChar *buf = NULL;
4650 int size = XML_PARSER_BUFFER_SIZE;
4651 int len = 0;
4652 xmlParserInputState state;
4653 const xmlChar *in;
4654 int nbchar = 0, ccol;
4655 int inputid;
4656
4657 /*
4658 * Check that there is a comment right here.
4659 */
4660 if ((RAW != '<') || (NXT(1) != '!') ||
4661 (NXT(2) != '-') || (NXT(3) != '-')) return;
4662 state = ctxt->instate;
4663 ctxt->instate = XML_PARSER_COMMENT;
4664 inputid = ctxt->input->id;
4665 SKIP(4);
4666 SHRINK;
4667 GROW;
4668
4669 /*
4670 * Accelerated common case where input don't need to be
4671 * modified before passing it to the handler.
4672 */
4673 in = ctxt->input->cur;
4674 do {
4675 if (*in == 0xA) {
4676 do {
4677 ctxt->input->line++; ctxt->input->col = 1;
4678 in++;
4679 } while (*in == 0xA);
4680 }
4681 get_more:
4682 ccol = ctxt->input->col;
4683 while (((*in > '-') && (*in <= 0x7F)) ||
4684 ((*in >= 0x20) && (*in < '-')) ||
4685 (*in == 0x09)) {
4686 in++;
4687 ccol++;
4688 }
4689 ctxt->input->col = ccol;
4690 if (*in == 0xA) {
4691 do {
4692 ctxt->input->line++; ctxt->input->col = 1;
4693 in++;
4694 } while (*in == 0xA);
4695 goto get_more;
4696 }
4697 nbchar = in - ctxt->input->cur;
4698 /*
4699 * save current set of data
4700 */
4701 if (nbchar > 0) {
4702 if ((ctxt->sax != NULL) &&
4703 (ctxt->sax->comment != NULL)) {
4704 if (buf == NULL) {
4705 if ((*in == '-') && (in[1] == '-'))
4706 size = nbchar + 1;
4707 else
4708 size = XML_PARSER_BUFFER_SIZE + nbchar;
4709 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4710 if (buf == NULL) {
4711 xmlErrMemory(ctxt, NULL);
4712 ctxt->instate = state;
4713 return;
4714 }
4715 len = 0;
4716 } else if (len + nbchar + 1 >= size) {
4717 xmlChar *new_buf;
4718 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719 new_buf = (xmlChar *) xmlRealloc(buf,
4720 size * sizeof(xmlChar));
4721 if (new_buf == NULL) {
4722 xmlFree (buf);
4723 xmlErrMemory(ctxt, NULL);
4724 ctxt->instate = state;
4725 return;
4726 }
4727 buf = new_buf;
4728 }
4729 memcpy(&buf[len], ctxt->input->cur, nbchar);
4730 len += nbchar;
4731 buf[len] = 0;
4732 }
4733 }
4734 ctxt->input->cur = in;
4735 if (*in == 0xA) {
4736 in++;
4737 ctxt->input->line++; ctxt->input->col = 1;
4738 }
4739 if (*in == 0xD) {
4740 in++;
4741 if (*in == 0xA) {
4742 ctxt->input->cur = in;
4743 in++;
4744 ctxt->input->line++; ctxt->input->col = 1;
4745 continue; /* while */
4746 }
4747 in--;
4748 }
4749 SHRINK;
4750 GROW;
4751 in = ctxt->input->cur;
4752 if (*in == '-') {
4753 if (in[1] == '-') {
4754 if (in[2] == '>') {
4755 if (ctxt->input->id != inputid) {
4756 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757 "comment doesn't start and stop in the same entity\n");
4758 }
4759 SKIP(3);
4760 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761 (!ctxt->disableSAX)) {
4762 if (buf != NULL)
4763 ctxt->sax->comment(ctxt->userData, buf);
4764 else
4765 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4766 }
4767 if (buf != NULL)
4768 xmlFree(buf);
4769 ctxt->instate = state;
4770 return;
4771 }
4772 if (buf != NULL)
4773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774 "Comment not terminated \n<!--%.50s\n",
4775 buf);
4776 else
4777 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778 "Comment not terminated \n", NULL);
4779 in++;
4780 ctxt->input->col++;
4781 }
4782 in++;
4783 ctxt->input->col++;
4784 goto get_more;
4785 }
4786 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787 xmlParseCommentComplex(ctxt, buf, len, size);
4788 ctxt->instate = state;
4789 return;
4790 }
4791
4792
4793 /**
4794 * xmlParsePITarget:
4795 * @ctxt: an XML parser context
4796 *
4797 * parse the name of a PI
4798 *
4799 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4800 *
4801 * Returns the PITarget name or NULL
4802 */
4803
4804 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4805 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4806 const xmlChar *name;
4807
4808 name = xmlParseName(ctxt);
4809 if ((name != NULL) &&
4810 ((name[0] == 'x') || (name[0] == 'X')) &&
4811 ((name[1] == 'm') || (name[1] == 'M')) &&
4812 ((name[2] == 'l') || (name[2] == 'L'))) {
4813 int i;
4814 if ((name[0] == 'x') && (name[1] == 'm') &&
4815 (name[2] == 'l') && (name[3] == 0)) {
4816 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4817 "XML declaration allowed only at the start of the document\n");
4818 return(name);
4819 } else if (name[3] == 0) {
4820 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4821 return(name);
4822 }
4823 for (i = 0;;i++) {
4824 if (xmlW3CPIs[i] == NULL) break;
4825 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4826 return(name);
4827 }
4828 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829 "xmlParsePITarget: invalid name prefix 'xml'\n",
4830 NULL, NULL);
4831 }
4832 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4835 }
4836 return(name);
4837 }
4838
4839 #ifdef LIBXML_CATALOG_ENABLED
4840 /**
4841 * xmlParseCatalogPI:
4842 * @ctxt: an XML parser context
4843 * @catalog: the PI value string
4844 *
4845 * parse an XML Catalog Processing Instruction.
4846 *
4847 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4848 *
4849 * Occurs only if allowed by the user and if happening in the Misc
4850 * part of the document before any doctype informations
4851 * This will add the given catalog to the parsing context in order
4852 * to be used if there is a resolution need further down in the document
4853 */
4854
4855 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4856 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857 xmlChar *URL = NULL;
4858 const xmlChar *tmp, *base;
4859 xmlChar marker;
4860
4861 tmp = catalog;
4862 while (IS_BLANK_CH(*tmp)) tmp++;
4863 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4864 goto error;
4865 tmp += 7;
4866 while (IS_BLANK_CH(*tmp)) tmp++;
4867 if (*tmp != '=') {
4868 return;
4869 }
4870 tmp++;
4871 while (IS_BLANK_CH(*tmp)) tmp++;
4872 marker = *tmp;
4873 if ((marker != '\'') && (marker != '"'))
4874 goto error;
4875 tmp++;
4876 base = tmp;
4877 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4878 if (*tmp == 0)
4879 goto error;
4880 URL = xmlStrndup(base, tmp - base);
4881 tmp++;
4882 while (IS_BLANK_CH(*tmp)) tmp++;
4883 if (*tmp != 0)
4884 goto error;
4885
4886 if (URL != NULL) {
4887 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4888 xmlFree(URL);
4889 }
4890 return;
4891
4892 error:
4893 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894 "Catalog PI syntax error: %s\n",
4895 catalog, NULL);
4896 if (URL != NULL)
4897 xmlFree(URL);
4898 }
4899 #endif
4900
4901 /**
4902 * xmlParsePI:
4903 * @ctxt: an XML parser context
4904 *
4905 * parse an XML Processing Instruction.
4906 *
4907 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4908 *
4909 * The processing is transfered to SAX once parsed.
4910 */
4911
4912 void
xmlParsePI(xmlParserCtxtPtr ctxt)4913 xmlParsePI(xmlParserCtxtPtr ctxt) {
4914 xmlChar *buf = NULL;
4915 int len = 0;
4916 int size = XML_PARSER_BUFFER_SIZE;
4917 int cur, l;
4918 const xmlChar *target;
4919 xmlParserInputState state;
4920 int count = 0;
4921
4922 if ((RAW == '<') && (NXT(1) == '?')) {
4923 xmlParserInputPtr input = ctxt->input;
4924 state = ctxt->instate;
4925 ctxt->instate = XML_PARSER_PI;
4926 /*
4927 * this is a Processing Instruction.
4928 */
4929 SKIP(2);
4930 SHRINK;
4931
4932 /*
4933 * Parse the target name and check for special support like
4934 * namespace.
4935 */
4936 target = xmlParsePITarget(ctxt);
4937 if (target != NULL) {
4938 if ((RAW == '?') && (NXT(1) == '>')) {
4939 if (input != ctxt->input) {
4940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941 "PI declaration doesn't start and stop in the same entity\n");
4942 }
4943 SKIP(2);
4944
4945 /*
4946 * SAX: PI detected.
4947 */
4948 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->processingInstruction != NULL))
4950 ctxt->sax->processingInstruction(ctxt->userData,
4951 target, NULL);
4952 ctxt->instate = state;
4953 return;
4954 }
4955 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4956 if (buf == NULL) {
4957 xmlErrMemory(ctxt, NULL);
4958 ctxt->instate = state;
4959 return;
4960 }
4961 cur = CUR;
4962 if (!IS_BLANK(cur)) {
4963 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964 "ParsePI: PI %s space expected\n", target);
4965 }
4966 SKIP_BLANKS;
4967 cur = CUR_CHAR(l);
4968 while (IS_CHAR(cur) && /* checked */
4969 ((cur != '?') || (NXT(1) != '>'))) {
4970 if (len + 5 >= size) {
4971 xmlChar *tmp;
4972
4973 size *= 2;
4974 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4975 if (tmp == NULL) {
4976 xmlErrMemory(ctxt, NULL);
4977 xmlFree(buf);
4978 ctxt->instate = state;
4979 return;
4980 }
4981 buf = tmp;
4982 }
4983 count++;
4984 if (count > 50) {
4985 GROW;
4986 count = 0;
4987 }
4988 COPY_BUF(l,buf,len,cur);
4989 NEXTL(l);
4990 cur = CUR_CHAR(l);
4991 if (cur == 0) {
4992 SHRINK;
4993 GROW;
4994 cur = CUR_CHAR(l);
4995 }
4996 }
4997 buf[len] = 0;
4998 if (cur != '?') {
4999 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000 "ParsePI: PI %s never end ...\n", target);
5001 } else {
5002 if (input != ctxt->input) {
5003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004 "PI declaration doesn't start and stop in the same entity\n");
5005 }
5006 SKIP(2);
5007
5008 #ifdef LIBXML_CATALOG_ENABLED
5009 if (((state == XML_PARSER_MISC) ||
5010 (state == XML_PARSER_START)) &&
5011 (xmlStrEqual(target, XML_CATALOG_PI))) {
5012 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014 (allow == XML_CATA_ALLOW_ALL))
5015 xmlParseCatalogPI(ctxt, buf);
5016 }
5017 #endif
5018
5019
5020 /*
5021 * SAX: PI detected.
5022 */
5023 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024 (ctxt->sax->processingInstruction != NULL))
5025 ctxt->sax->processingInstruction(ctxt->userData,
5026 target, buf);
5027 }
5028 xmlFree(buf);
5029 } else {
5030 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5031 }
5032 ctxt->instate = state;
5033 }
5034 }
5035
5036 /**
5037 * xmlParseNotationDecl:
5038 * @ctxt: an XML parser context
5039 *
5040 * parse a notation declaration
5041 *
5042 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5043 *
5044 * Hence there is actually 3 choices:
5045 * 'PUBLIC' S PubidLiteral
5046 * 'PUBLIC' S PubidLiteral S SystemLiteral
5047 * and 'SYSTEM' S SystemLiteral
5048 *
5049 * See the NOTE on xmlParseExternalID().
5050 */
5051
5052 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5053 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5054 const xmlChar *name;
5055 xmlChar *Pubid;
5056 xmlChar *Systemid;
5057
5058 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5059 xmlParserInputPtr input = ctxt->input;
5060 SHRINK;
5061 SKIP(10);
5062 if (!IS_BLANK_CH(CUR)) {
5063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064 "Space required after '<!NOTATION'\n");
5065 return;
5066 }
5067 SKIP_BLANKS;
5068
5069 name = xmlParseName(ctxt);
5070 if (name == NULL) {
5071 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5072 return;
5073 }
5074 if (!IS_BLANK_CH(CUR)) {
5075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076 "Space required after the NOTATION name'\n");
5077 return;
5078 }
5079 if (xmlStrchr(name, ':') != NULL) {
5080 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081 "colon are forbidden from notation names '%s'\n",
5082 name, NULL, NULL);
5083 }
5084 SKIP_BLANKS;
5085
5086 /*
5087 * Parse the IDs.
5088 */
5089 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5090 SKIP_BLANKS;
5091
5092 if (RAW == '>') {
5093 if (input != ctxt->input) {
5094 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095 "Notation declaration doesn't start and stop in the same entity\n");
5096 }
5097 NEXT;
5098 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099 (ctxt->sax->notationDecl != NULL))
5100 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5101 } else {
5102 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5103 }
5104 if (Systemid != NULL) xmlFree(Systemid);
5105 if (Pubid != NULL) xmlFree(Pubid);
5106 }
5107 }
5108
5109 /**
5110 * xmlParseEntityDecl:
5111 * @ctxt: an XML parser context
5112 *
5113 * parse <!ENTITY declarations
5114 *
5115 * [70] EntityDecl ::= GEDecl | PEDecl
5116 *
5117 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5118 *
5119 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5120 *
5121 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5122 *
5123 * [74] PEDef ::= EntityValue | ExternalID
5124 *
5125 * [76] NDataDecl ::= S 'NDATA' S Name
5126 *
5127 * [ VC: Notation Declared ]
5128 * The Name must match the declared name of a notation.
5129 */
5130
5131 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5132 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5133 const xmlChar *name = NULL;
5134 xmlChar *value = NULL;
5135 xmlChar *URI = NULL, *literal = NULL;
5136 const xmlChar *ndata = NULL;
5137 int isParameter = 0;
5138 xmlChar *orig = NULL;
5139 int skipped;
5140
5141 /* GROW; done in the caller */
5142 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5143 xmlParserInputPtr input = ctxt->input;
5144 SHRINK;
5145 SKIP(8);
5146 skipped = SKIP_BLANKS;
5147 if (skipped == 0) {
5148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "Space required after '<!ENTITY'\n");
5150 }
5151
5152 if (RAW == '%') {
5153 NEXT;
5154 skipped = SKIP_BLANKS;
5155 if (skipped == 0) {
5156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157 "Space required after '%'\n");
5158 }
5159 isParameter = 1;
5160 }
5161
5162 name = xmlParseName(ctxt);
5163 if (name == NULL) {
5164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165 "xmlParseEntityDecl: no name\n");
5166 return;
5167 }
5168 if (xmlStrchr(name, ':') != NULL) {
5169 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170 "colon are forbidden from entities names '%s'\n",
5171 name, NULL, NULL);
5172 }
5173 skipped = SKIP_BLANKS;
5174 if (skipped == 0) {
5175 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 "Space required after the entity name\n");
5177 }
5178
5179 ctxt->instate = XML_PARSER_ENTITY_DECL;
5180 /*
5181 * handle the various case of definitions...
5182 */
5183 if (isParameter) {
5184 if ((RAW == '"') || (RAW == '\'')) {
5185 value = xmlParseEntityValue(ctxt, &orig);
5186 if (value) {
5187 if ((ctxt->sax != NULL) &&
5188 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189 ctxt->sax->entityDecl(ctxt->userData, name,
5190 XML_INTERNAL_PARAMETER_ENTITY,
5191 NULL, NULL, value);
5192 }
5193 } else {
5194 URI = xmlParseExternalID(ctxt, &literal, 1);
5195 if ((URI == NULL) && (literal == NULL)) {
5196 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5197 }
5198 if (URI) {
5199 xmlURIPtr uri;
5200
5201 uri = xmlParseURI((const char *) URI);
5202 if (uri == NULL) {
5203 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204 "Invalid URI: %s\n", URI);
5205 /*
5206 * This really ought to be a well formedness error
5207 * but the XML Core WG decided otherwise c.f. issue
5208 * E26 of the XML erratas.
5209 */
5210 } else {
5211 if (uri->fragment != NULL) {
5212 /*
5213 * Okay this is foolish to block those but not
5214 * invalid URIs.
5215 */
5216 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5217 } else {
5218 if ((ctxt->sax != NULL) &&
5219 (!ctxt->disableSAX) &&
5220 (ctxt->sax->entityDecl != NULL))
5221 ctxt->sax->entityDecl(ctxt->userData, name,
5222 XML_EXTERNAL_PARAMETER_ENTITY,
5223 literal, URI, NULL);
5224 }
5225 xmlFreeURI(uri);
5226 }
5227 }
5228 }
5229 } else {
5230 if ((RAW == '"') || (RAW == '\'')) {
5231 value = xmlParseEntityValue(ctxt, &orig);
5232 if ((ctxt->sax != NULL) &&
5233 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234 ctxt->sax->entityDecl(ctxt->userData, name,
5235 XML_INTERNAL_GENERAL_ENTITY,
5236 NULL, NULL, value);
5237 /*
5238 * For expat compatibility in SAX mode.
5239 */
5240 if ((ctxt->myDoc == NULL) ||
5241 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242 if (ctxt->myDoc == NULL) {
5243 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5244 if (ctxt->myDoc == NULL) {
5245 xmlErrMemory(ctxt, "New Doc failed");
5246 return;
5247 }
5248 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5249 }
5250 if (ctxt->myDoc->intSubset == NULL)
5251 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252 BAD_CAST "fake", NULL, NULL);
5253
5254 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5255 NULL, NULL, value);
5256 }
5257 } else {
5258 URI = xmlParseExternalID(ctxt, &literal, 1);
5259 if ((URI == NULL) && (literal == NULL)) {
5260 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5261 }
5262 if (URI) {
5263 xmlURIPtr uri;
5264
5265 uri = xmlParseURI((const char *)URI);
5266 if (uri == NULL) {
5267 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268 "Invalid URI: %s\n", URI);
5269 /*
5270 * This really ought to be a well formedness error
5271 * but the XML Core WG decided otherwise c.f. issue
5272 * E26 of the XML erratas.
5273 */
5274 } else {
5275 if (uri->fragment != NULL) {
5276 /*
5277 * Okay this is foolish to block those but not
5278 * invalid URIs.
5279 */
5280 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5281 }
5282 xmlFreeURI(uri);
5283 }
5284 }
5285 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5286 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287 "Space required before 'NDATA'\n");
5288 }
5289 SKIP_BLANKS;
5290 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5291 SKIP(5);
5292 if (!IS_BLANK_CH(CUR)) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 "Space required after 'NDATA'\n");
5295 }
5296 SKIP_BLANKS;
5297 ndata = xmlParseName(ctxt);
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->unparsedEntityDecl != NULL))
5300 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301 literal, URI, ndata);
5302 } else {
5303 if ((ctxt->sax != NULL) &&
5304 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305 ctxt->sax->entityDecl(ctxt->userData, name,
5306 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307 literal, URI, NULL);
5308 /*
5309 * For expat compatibility in SAX mode.
5310 * assuming the entity repalcement was asked for
5311 */
5312 if ((ctxt->replaceEntities != 0) &&
5313 ((ctxt->myDoc == NULL) ||
5314 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315 if (ctxt->myDoc == NULL) {
5316 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5317 if (ctxt->myDoc == NULL) {
5318 xmlErrMemory(ctxt, "New Doc failed");
5319 return;
5320 }
5321 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5322 }
5323
5324 if (ctxt->myDoc->intSubset == NULL)
5325 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326 BAD_CAST "fake", NULL, NULL);
5327 xmlSAX2EntityDecl(ctxt, name,
5328 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329 literal, URI, NULL);
5330 }
5331 }
5332 }
5333 }
5334 SKIP_BLANKS;
5335 if (RAW != '>') {
5336 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5337 "xmlParseEntityDecl: entity %s not terminated\n", name);
5338 } else {
5339 if (input != ctxt->input) {
5340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341 "Entity declaration doesn't start and stop in the same entity\n");
5342 }
5343 NEXT;
5344 }
5345 if (orig != NULL) {
5346 /*
5347 * Ugly mechanism to save the raw entity value.
5348 */
5349 xmlEntityPtr cur = NULL;
5350
5351 if (isParameter) {
5352 if ((ctxt->sax != NULL) &&
5353 (ctxt->sax->getParameterEntity != NULL))
5354 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5355 } else {
5356 if ((ctxt->sax != NULL) &&
5357 (ctxt->sax->getEntity != NULL))
5358 cur = ctxt->sax->getEntity(ctxt->userData, name);
5359 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5360 cur = xmlSAX2GetEntity(ctxt, name);
5361 }
5362 }
5363 if (cur != NULL) {
5364 if (cur->orig != NULL)
5365 xmlFree(orig);
5366 else
5367 cur->orig = orig;
5368 } else
5369 xmlFree(orig);
5370 }
5371 if (value != NULL) xmlFree(value);
5372 if (URI != NULL) xmlFree(URI);
5373 if (literal != NULL) xmlFree(literal);
5374 }
5375 }
5376
5377 /**
5378 * xmlParseDefaultDecl:
5379 * @ctxt: an XML parser context
5380 * @value: Receive a possible fixed default value for the attribute
5381 *
5382 * Parse an attribute default declaration
5383 *
5384 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5385 *
5386 * [ VC: Required Attribute ]
5387 * if the default declaration is the keyword #REQUIRED, then the
5388 * attribute must be specified for all elements of the type in the
5389 * attribute-list declaration.
5390 *
5391 * [ VC: Attribute Default Legal ]
5392 * The declared default value must meet the lexical constraints of
5393 * the declared attribute type c.f. xmlValidateAttributeDecl()
5394 *
5395 * [ VC: Fixed Attribute Default ]
5396 * if an attribute has a default value declared with the #FIXED
5397 * keyword, instances of that attribute must match the default value.
5398 *
5399 * [ WFC: No < in Attribute Values ]
5400 * handled in xmlParseAttValue()
5401 *
5402 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403 * or XML_ATTRIBUTE_FIXED.
5404 */
5405
5406 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5407 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5408 int val;
5409 xmlChar *ret;
5410
5411 *value = NULL;
5412 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5413 SKIP(9);
5414 return(XML_ATTRIBUTE_REQUIRED);
5415 }
5416 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5417 SKIP(8);
5418 return(XML_ATTRIBUTE_IMPLIED);
5419 }
5420 val = XML_ATTRIBUTE_NONE;
5421 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5422 SKIP(6);
5423 val = XML_ATTRIBUTE_FIXED;
5424 if (!IS_BLANK_CH(CUR)) {
5425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426 "Space required after '#FIXED'\n");
5427 }
5428 SKIP_BLANKS;
5429 }
5430 ret = xmlParseAttValue(ctxt);
5431 ctxt->instate = XML_PARSER_DTD;
5432 if (ret == NULL) {
5433 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5434 "Attribute default value declaration error\n");
5435 } else
5436 *value = ret;
5437 return(val);
5438 }
5439
5440 /**
5441 * xmlParseNotationType:
5442 * @ctxt: an XML parser context
5443 *
5444 * parse an Notation attribute type.
5445 *
5446 * Note: the leading 'NOTATION' S part has already being parsed...
5447 *
5448 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5449 *
5450 * [ VC: Notation Attributes ]
5451 * Values of this type must match one of the notation names included
5452 * in the declaration; all notation names in the declaration must be declared.
5453 *
5454 * Returns: the notation attribute tree built while parsing
5455 */
5456
5457 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5458 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5459 const xmlChar *name;
5460 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5461
5462 if (RAW != '(') {
5463 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5464 return(NULL);
5465 }
5466 SHRINK;
5467 do {
5468 NEXT;
5469 SKIP_BLANKS;
5470 name = xmlParseName(ctxt);
5471 if (name == NULL) {
5472 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473 "Name expected in NOTATION declaration\n");
5474 xmlFreeEnumeration(ret);
5475 return(NULL);
5476 }
5477 tmp = ret;
5478 while (tmp != NULL) {
5479 if (xmlStrEqual(name, tmp->name)) {
5480 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481 "standalone: attribute notation value token %s duplicated\n",
5482 name, NULL);
5483 if (!xmlDictOwns(ctxt->dict, name))
5484 xmlFree((xmlChar *) name);
5485 break;
5486 }
5487 tmp = tmp->next;
5488 }
5489 if (tmp == NULL) {
5490 cur = xmlCreateEnumeration(name);
5491 if (cur == NULL) {
5492 xmlFreeEnumeration(ret);
5493 return(NULL);
5494 }
5495 if (last == NULL) ret = last = cur;
5496 else {
5497 last->next = cur;
5498 last = cur;
5499 }
5500 }
5501 SKIP_BLANKS;
5502 } while (RAW == '|');
5503 if (RAW != ')') {
5504 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5505 xmlFreeEnumeration(ret);
5506 return(NULL);
5507 }
5508 NEXT;
5509 return(ret);
5510 }
5511
5512 /**
5513 * xmlParseEnumerationType:
5514 * @ctxt: an XML parser context
5515 *
5516 * parse an Enumeration attribute type.
5517 *
5518 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5519 *
5520 * [ VC: Enumeration ]
5521 * Values of this type must match one of the Nmtoken tokens in
5522 * the declaration
5523 *
5524 * Returns: the enumeration attribute tree built while parsing
5525 */
5526
5527 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5528 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5529 xmlChar *name;
5530 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5531
5532 if (RAW != '(') {
5533 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5534 return(NULL);
5535 }
5536 SHRINK;
5537 do {
5538 NEXT;
5539 SKIP_BLANKS;
5540 name = xmlParseNmtoken(ctxt);
5541 if (name == NULL) {
5542 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5543 return(ret);
5544 }
5545 tmp = ret;
5546 while (tmp != NULL) {
5547 if (xmlStrEqual(name, tmp->name)) {
5548 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549 "standalone: attribute enumeration value token %s duplicated\n",
5550 name, NULL);
5551 if (!xmlDictOwns(ctxt->dict, name))
5552 xmlFree(name);
5553 break;
5554 }
5555 tmp = tmp->next;
5556 }
5557 if (tmp == NULL) {
5558 cur = xmlCreateEnumeration(name);
5559 if (!xmlDictOwns(ctxt->dict, name))
5560 xmlFree(name);
5561 if (cur == NULL) {
5562 xmlFreeEnumeration(ret);
5563 return(NULL);
5564 }
5565 if (last == NULL) ret = last = cur;
5566 else {
5567 last->next = cur;
5568 last = cur;
5569 }
5570 }
5571 SKIP_BLANKS;
5572 } while (RAW == '|');
5573 if (RAW != ')') {
5574 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5575 return(ret);
5576 }
5577 NEXT;
5578 return(ret);
5579 }
5580
5581 /**
5582 * xmlParseEnumeratedType:
5583 * @ctxt: an XML parser context
5584 * @tree: the enumeration tree built while parsing
5585 *
5586 * parse an Enumerated attribute type.
5587 *
5588 * [57] EnumeratedType ::= NotationType | Enumeration
5589 *
5590 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5591 *
5592 *
5593 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5594 */
5595
5596 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5597 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5598 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5599 SKIP(8);
5600 if (!IS_BLANK_CH(CUR)) {
5601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602 "Space required after 'NOTATION'\n");
5603 return(0);
5604 }
5605 SKIP_BLANKS;
5606 *tree = xmlParseNotationType(ctxt);
5607 if (*tree == NULL) return(0);
5608 return(XML_ATTRIBUTE_NOTATION);
5609 }
5610 *tree = xmlParseEnumerationType(ctxt);
5611 if (*tree == NULL) return(0);
5612 return(XML_ATTRIBUTE_ENUMERATION);
5613 }
5614
5615 /**
5616 * xmlParseAttributeType:
5617 * @ctxt: an XML parser context
5618 * @tree: the enumeration tree built while parsing
5619 *
5620 * parse the Attribute list def for an element
5621 *
5622 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5623 *
5624 * [55] StringType ::= 'CDATA'
5625 *
5626 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5628 *
5629 * Validity constraints for attribute values syntax are checked in
5630 * xmlValidateAttributeValue()
5631 *
5632 * [ VC: ID ]
5633 * Values of type ID must match the Name production. A name must not
5634 * appear more than once in an XML document as a value of this type;
5635 * i.e., ID values must uniquely identify the elements which bear them.
5636 *
5637 * [ VC: One ID per Element Type ]
5638 * No element type may have more than one ID attribute specified.
5639 *
5640 * [ VC: ID Attribute Default ]
5641 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5642 *
5643 * [ VC: IDREF ]
5644 * Values of type IDREF must match the Name production, and values
5645 * of type IDREFS must match Names; each IDREF Name must match the value
5646 * of an ID attribute on some element in the XML document; i.e. IDREF
5647 * values must match the value of some ID attribute.
5648 *
5649 * [ VC: Entity Name ]
5650 * Values of type ENTITY must match the Name production, values
5651 * of type ENTITIES must match Names; each Entity Name must match the
5652 * name of an unparsed entity declared in the DTD.
5653 *
5654 * [ VC: Name Token ]
5655 * Values of type NMTOKEN must match the Nmtoken production; values
5656 * of type NMTOKENS must match Nmtokens.
5657 *
5658 * Returns the attribute type
5659 */
5660 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5661 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5662 SHRINK;
5663 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5664 SKIP(5);
5665 return(XML_ATTRIBUTE_CDATA);
5666 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5667 SKIP(6);
5668 return(XML_ATTRIBUTE_IDREFS);
5669 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5670 SKIP(5);
5671 return(XML_ATTRIBUTE_IDREF);
5672 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5673 SKIP(2);
5674 return(XML_ATTRIBUTE_ID);
5675 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5676 SKIP(6);
5677 return(XML_ATTRIBUTE_ENTITY);
5678 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5679 SKIP(8);
5680 return(XML_ATTRIBUTE_ENTITIES);
5681 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5682 SKIP(8);
5683 return(XML_ATTRIBUTE_NMTOKENS);
5684 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5685 SKIP(7);
5686 return(XML_ATTRIBUTE_NMTOKEN);
5687 }
5688 return(xmlParseEnumeratedType(ctxt, tree));
5689 }
5690
5691 /**
5692 * xmlParseAttributeListDecl:
5693 * @ctxt: an XML parser context
5694 *
5695 * : parse the Attribute list def for an element
5696 *
5697 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5698 *
5699 * [53] AttDef ::= S Name S AttType S DefaultDecl
5700 *
5701 */
5702 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5703 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5704 const xmlChar *elemName;
5705 const xmlChar *attrName;
5706 xmlEnumerationPtr tree;
5707
5708 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5709 xmlParserInputPtr input = ctxt->input;
5710
5711 SKIP(9);
5712 if (!IS_BLANK_CH(CUR)) {
5713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714 "Space required after '<!ATTLIST'\n");
5715 }
5716 SKIP_BLANKS;
5717 elemName = xmlParseName(ctxt);
5718 if (elemName == NULL) {
5719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720 "ATTLIST: no name for Element\n");
5721 return;
5722 }
5723 SKIP_BLANKS;
5724 GROW;
5725 while (RAW != '>') {
5726 const xmlChar *check = CUR_PTR;
5727 int type;
5728 int def;
5729 xmlChar *defaultValue = NULL;
5730
5731 GROW;
5732 tree = NULL;
5733 attrName = xmlParseName(ctxt);
5734 if (attrName == NULL) {
5735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736 "ATTLIST: no name for Attribute\n");
5737 break;
5738 }
5739 GROW;
5740 if (!IS_BLANK_CH(CUR)) {
5741 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5742 "Space required after the attribute name\n");
5743 break;
5744 }
5745 SKIP_BLANKS;
5746
5747 type = xmlParseAttributeType(ctxt, &tree);
5748 if (type <= 0) {
5749 break;
5750 }
5751
5752 GROW;
5753 if (!IS_BLANK_CH(CUR)) {
5754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755 "Space required after the attribute type\n");
5756 if (tree != NULL)
5757 xmlFreeEnumeration(tree);
5758 break;
5759 }
5760 SKIP_BLANKS;
5761
5762 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5763 if (def <= 0) {
5764 if (defaultValue != NULL)
5765 xmlFree(defaultValue);
5766 if (tree != NULL)
5767 xmlFreeEnumeration(tree);
5768 break;
5769 }
5770 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5772
5773 GROW;
5774 if (RAW != '>') {
5775 if (!IS_BLANK_CH(CUR)) {
5776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777 "Space required after the attribute default value\n");
5778 if (defaultValue != NULL)
5779 xmlFree(defaultValue);
5780 if (tree != NULL)
5781 xmlFreeEnumeration(tree);
5782 break;
5783 }
5784 SKIP_BLANKS;
5785 }
5786 if (check == CUR_PTR) {
5787 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788 "in xmlParseAttributeListDecl\n");
5789 if (defaultValue != NULL)
5790 xmlFree(defaultValue);
5791 if (tree != NULL)
5792 xmlFreeEnumeration(tree);
5793 break;
5794 }
5795 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796 (ctxt->sax->attributeDecl != NULL))
5797 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798 type, def, defaultValue, tree);
5799 else if (tree != NULL)
5800 xmlFreeEnumeration(tree);
5801
5802 if ((ctxt->sax2) && (defaultValue != NULL) &&
5803 (def != XML_ATTRIBUTE_IMPLIED) &&
5804 (def != XML_ATTRIBUTE_REQUIRED)) {
5805 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5806 }
5807 if (ctxt->sax2) {
5808 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5809 }
5810 if (defaultValue != NULL)
5811 xmlFree(defaultValue);
5812 GROW;
5813 }
5814 if (RAW == '>') {
5815 if (input != ctxt->input) {
5816 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817 "Attribute list declaration doesn't start and stop in the same entity\n",
5818 NULL, NULL);
5819 }
5820 NEXT;
5821 }
5822 }
5823 }
5824
5825 /**
5826 * xmlParseElementMixedContentDecl:
5827 * @ctxt: an XML parser context
5828 * @inputchk: the input used for the current entity, needed for boundary checks
5829 *
5830 * parse the declaration for a Mixed Element content
5831 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5832 *
5833 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834 * '(' S? '#PCDATA' S? ')'
5835 *
5836 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5837 *
5838 * [ VC: No Duplicate Types ]
5839 * The same name must not appear more than once in a single
5840 * mixed-content declaration.
5841 *
5842 * returns: the list of the xmlElementContentPtr describing the element choices
5843 */
5844 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5845 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5846 xmlElementContentPtr ret = NULL, cur = NULL, n;
5847 const xmlChar *elem = NULL;
5848
5849 GROW;
5850 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5851 SKIP(7);
5852 SKIP_BLANKS;
5853 SHRINK;
5854 if (RAW == ')') {
5855 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5856 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857 "Element content declaration doesn't start and stop in the same entity\n",
5858 NULL, NULL);
5859 }
5860 NEXT;
5861 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5862 if (ret == NULL)
5863 return(NULL);
5864 if (RAW == '*') {
5865 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5866 NEXT;
5867 }
5868 return(ret);
5869 }
5870 if ((RAW == '(') || (RAW == '|')) {
5871 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5872 if (ret == NULL) return(NULL);
5873 }
5874 while (RAW == '|') {
5875 NEXT;
5876 if (elem == NULL) {
5877 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5878 if (ret == NULL) return(NULL);
5879 ret->c1 = cur;
5880 if (cur != NULL)
5881 cur->parent = ret;
5882 cur = ret;
5883 } else {
5884 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5885 if (n == NULL) return(NULL);
5886 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5887 if (n->c1 != NULL)
5888 n->c1->parent = n;
5889 cur->c2 = n;
5890 if (n != NULL)
5891 n->parent = cur;
5892 cur = n;
5893 }
5894 SKIP_BLANKS;
5895 elem = xmlParseName(ctxt);
5896 if (elem == NULL) {
5897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5898 "xmlParseElementMixedContentDecl : Name expected\n");
5899 xmlFreeDocElementContent(ctxt->myDoc, cur);
5900 return(NULL);
5901 }
5902 SKIP_BLANKS;
5903 GROW;
5904 }
5905 if ((RAW == ')') && (NXT(1) == '*')) {
5906 if (elem != NULL) {
5907 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5908 XML_ELEMENT_CONTENT_ELEMENT);
5909 if (cur->c2 != NULL)
5910 cur->c2->parent = cur;
5911 }
5912 if (ret != NULL)
5913 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5914 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5915 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916 "Element content declaration doesn't start and stop in the same entity\n",
5917 NULL, NULL);
5918 }
5919 SKIP(2);
5920 } else {
5921 xmlFreeDocElementContent(ctxt->myDoc, ret);
5922 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5923 return(NULL);
5924 }
5925
5926 } else {
5927 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5928 }
5929 return(ret);
5930 }
5931
5932 /**
5933 * xmlParseElementChildrenContentDeclPriv:
5934 * @ctxt: an XML parser context
5935 * @inputchk: the input used for the current entity, needed for boundary checks
5936 * @depth: the level of recursion
5937 *
5938 * parse the declaration for a Mixed Element content
5939 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5940 *
5941 *
5942 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5943 *
5944 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5945 *
5946 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5947 *
5948 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5949 *
5950 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951 * TODO Parameter-entity replacement text must be properly nested
5952 * with parenthesized groups. That is to say, if either of the
5953 * opening or closing parentheses in a choice, seq, or Mixed
5954 * construct is contained in the replacement text for a parameter
5955 * entity, both must be contained in the same replacement text. For
5956 * interoperability, if a parameter-entity reference appears in a
5957 * choice, seq, or Mixed construct, its replacement text should not
5958 * be empty, and neither the first nor last non-blank character of
5959 * the replacement text should be a connector (| or ,).
5960 *
5961 * Returns the tree of xmlElementContentPtr describing the element
5962 * hierarchy.
5963 */
5964 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)5965 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5966 int depth) {
5967 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5968 const xmlChar *elem;
5969 xmlChar type = 0;
5970
5971 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5972 (depth > 2048)) {
5973 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5975 depth);
5976 return(NULL);
5977 }
5978 SKIP_BLANKS;
5979 GROW;
5980 if (RAW == '(') {
5981 int inputid = ctxt->input->id;
5982
5983 /* Recurse on first child */
5984 NEXT;
5985 SKIP_BLANKS;
5986 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5987 depth + 1);
5988 SKIP_BLANKS;
5989 GROW;
5990 } else {
5991 elem = xmlParseName(ctxt);
5992 if (elem == NULL) {
5993 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5994 return(NULL);
5995 }
5996 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5997 if (cur == NULL) {
5998 xmlErrMemory(ctxt, NULL);
5999 return(NULL);
6000 }
6001 GROW;
6002 if (RAW == '?') {
6003 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6004 NEXT;
6005 } else if (RAW == '*') {
6006 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6007 NEXT;
6008 } else if (RAW == '+') {
6009 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6010 NEXT;
6011 } else {
6012 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6013 }
6014 GROW;
6015 }
6016 SKIP_BLANKS;
6017 SHRINK;
6018 while (RAW != ')') {
6019 /*
6020 * Each loop we parse one separator and one element.
6021 */
6022 if (RAW == ',') {
6023 if (type == 0) type = CUR;
6024
6025 /*
6026 * Detect "Name | Name , Name" error
6027 */
6028 else if (type != CUR) {
6029 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6030 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6031 type);
6032 if ((last != NULL) && (last != ret))
6033 xmlFreeDocElementContent(ctxt->myDoc, last);
6034 if (ret != NULL)
6035 xmlFreeDocElementContent(ctxt->myDoc, ret);
6036 return(NULL);
6037 }
6038 NEXT;
6039
6040 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6041 if (op == NULL) {
6042 if ((last != NULL) && (last != ret))
6043 xmlFreeDocElementContent(ctxt->myDoc, last);
6044 xmlFreeDocElementContent(ctxt->myDoc, ret);
6045 return(NULL);
6046 }
6047 if (last == NULL) {
6048 op->c1 = ret;
6049 if (ret != NULL)
6050 ret->parent = op;
6051 ret = cur = op;
6052 } else {
6053 cur->c2 = op;
6054 if (op != NULL)
6055 op->parent = cur;
6056 op->c1 = last;
6057 if (last != NULL)
6058 last->parent = op;
6059 cur =op;
6060 last = NULL;
6061 }
6062 } else if (RAW == '|') {
6063 if (type == 0) type = CUR;
6064
6065 /*
6066 * Detect "Name , Name | Name" error
6067 */
6068 else if (type != CUR) {
6069 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6070 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6071 type);
6072 if ((last != NULL) && (last != ret))
6073 xmlFreeDocElementContent(ctxt->myDoc, last);
6074 if (ret != NULL)
6075 xmlFreeDocElementContent(ctxt->myDoc, ret);
6076 return(NULL);
6077 }
6078 NEXT;
6079
6080 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6081 if (op == NULL) {
6082 if ((last != NULL) && (last != ret))
6083 xmlFreeDocElementContent(ctxt->myDoc, last);
6084 if (ret != NULL)
6085 xmlFreeDocElementContent(ctxt->myDoc, ret);
6086 return(NULL);
6087 }
6088 if (last == NULL) {
6089 op->c1 = ret;
6090 if (ret != NULL)
6091 ret->parent = op;
6092 ret = cur = op;
6093 } else {
6094 cur->c2 = op;
6095 if (op != NULL)
6096 op->parent = cur;
6097 op->c1 = last;
6098 if (last != NULL)
6099 last->parent = op;
6100 cur =op;
6101 last = NULL;
6102 }
6103 } else {
6104 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6105 if ((last != NULL) && (last != ret))
6106 xmlFreeDocElementContent(ctxt->myDoc, last);
6107 if (ret != NULL)
6108 xmlFreeDocElementContent(ctxt->myDoc, ret);
6109 return(NULL);
6110 }
6111 GROW;
6112 SKIP_BLANKS;
6113 GROW;
6114 if (RAW == '(') {
6115 int inputid = ctxt->input->id;
6116 /* Recurse on second child */
6117 NEXT;
6118 SKIP_BLANKS;
6119 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6120 depth + 1);
6121 SKIP_BLANKS;
6122 } else {
6123 elem = xmlParseName(ctxt);
6124 if (elem == NULL) {
6125 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6126 if (ret != NULL)
6127 xmlFreeDocElementContent(ctxt->myDoc, ret);
6128 return(NULL);
6129 }
6130 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6131 if (last == NULL) {
6132 if (ret != NULL)
6133 xmlFreeDocElementContent(ctxt->myDoc, ret);
6134 return(NULL);
6135 }
6136 if (RAW == '?') {
6137 last->ocur = XML_ELEMENT_CONTENT_OPT;
6138 NEXT;
6139 } else if (RAW == '*') {
6140 last->ocur = XML_ELEMENT_CONTENT_MULT;
6141 NEXT;
6142 } else if (RAW == '+') {
6143 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6144 NEXT;
6145 } else {
6146 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6147 }
6148 }
6149 SKIP_BLANKS;
6150 GROW;
6151 }
6152 if ((cur != NULL) && (last != NULL)) {
6153 cur->c2 = last;
6154 if (last != NULL)
6155 last->parent = cur;
6156 }
6157 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6158 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159 "Element content declaration doesn't start and stop in the same entity\n",
6160 NULL, NULL);
6161 }
6162 NEXT;
6163 if (RAW == '?') {
6164 if (ret != NULL) {
6165 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168 else
6169 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6170 }
6171 NEXT;
6172 } else if (RAW == '*') {
6173 if (ret != NULL) {
6174 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6175 cur = ret;
6176 /*
6177 * Some normalization:
6178 * (a | b* | c?)* == (a | b | c)*
6179 */
6180 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6181 if ((cur->c1 != NULL) &&
6182 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185 if ((cur->c2 != NULL) &&
6186 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6189 cur = cur->c2;
6190 }
6191 }
6192 NEXT;
6193 } else if (RAW == '+') {
6194 if (ret != NULL) {
6195 int found = 0;
6196
6197 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6200 else
6201 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6202 /*
6203 * Some normalization:
6204 * (a | b*)+ == (a | b)*
6205 * (a | b?)+ == (a | b)*
6206 */
6207 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6208 if ((cur->c1 != NULL) &&
6209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 found = 1;
6213 }
6214 if ((cur->c2 != NULL) &&
6215 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6218 found = 1;
6219 }
6220 cur = cur->c2;
6221 }
6222 if (found)
6223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6224 }
6225 NEXT;
6226 }
6227 return(ret);
6228 }
6229
6230 /**
6231 * xmlParseElementChildrenContentDecl:
6232 * @ctxt: an XML parser context
6233 * @inputchk: the input used for the current entity, needed for boundary checks
6234 *
6235 * parse the declaration for a Mixed Element content
6236 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6237 *
6238 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6239 *
6240 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6241 *
6242 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6243 *
6244 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6245 *
6246 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247 * TODO Parameter-entity replacement text must be properly nested
6248 * with parenthesized groups. That is to say, if either of the
6249 * opening or closing parentheses in a choice, seq, or Mixed
6250 * construct is contained in the replacement text for a parameter
6251 * entity, both must be contained in the same replacement text. For
6252 * interoperability, if a parameter-entity reference appears in a
6253 * choice, seq, or Mixed construct, its replacement text should not
6254 * be empty, and neither the first nor last non-blank character of
6255 * the replacement text should be a connector (| or ,).
6256 *
6257 * Returns the tree of xmlElementContentPtr describing the element
6258 * hierarchy.
6259 */
6260 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6261 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262 /* stub left for API/ABI compat */
6263 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6264 }
6265
6266 /**
6267 * xmlParseElementContentDecl:
6268 * @ctxt: an XML parser context
6269 * @name: the name of the element being defined.
6270 * @result: the Element Content pointer will be stored here if any
6271 *
6272 * parse the declaration for an Element content either Mixed or Children,
6273 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6274 *
6275 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6276 *
6277 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6278 */
6279
6280 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6281 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6282 xmlElementContentPtr *result) {
6283
6284 xmlElementContentPtr tree = NULL;
6285 int inputid = ctxt->input->id;
6286 int res;
6287
6288 *result = NULL;
6289
6290 if (RAW != '(') {
6291 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6292 "xmlParseElementContentDecl : %s '(' expected\n", name);
6293 return(-1);
6294 }
6295 NEXT;
6296 GROW;
6297 SKIP_BLANKS;
6298 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6299 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6300 res = XML_ELEMENT_TYPE_MIXED;
6301 } else {
6302 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6303 res = XML_ELEMENT_TYPE_ELEMENT;
6304 }
6305 SKIP_BLANKS;
6306 *result = tree;
6307 return(res);
6308 }
6309
6310 /**
6311 * xmlParseElementDecl:
6312 * @ctxt: an XML parser context
6313 *
6314 * parse an Element declaration.
6315 *
6316 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6317 *
6318 * [ VC: Unique Element Type Declaration ]
6319 * No element type may be declared more than once
6320 *
6321 * Returns the type of the element, or -1 in case of error
6322 */
6323 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6324 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6325 const xmlChar *name;
6326 int ret = -1;
6327 xmlElementContentPtr content = NULL;
6328
6329 /* GROW; done in the caller */
6330 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6331 xmlParserInputPtr input = ctxt->input;
6332
6333 SKIP(9);
6334 if (!IS_BLANK_CH(CUR)) {
6335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336 "Space required after 'ELEMENT'\n");
6337 }
6338 SKIP_BLANKS;
6339 name = xmlParseName(ctxt);
6340 if (name == NULL) {
6341 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342 "xmlParseElementDecl: no name for Element\n");
6343 return(-1);
6344 }
6345 while ((RAW == 0) && (ctxt->inputNr > 1))
6346 xmlPopInput(ctxt);
6347 if (!IS_BLANK_CH(CUR)) {
6348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349 "Space required after the element name\n");
6350 }
6351 SKIP_BLANKS;
6352 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6353 SKIP(5);
6354 /*
6355 * Element must always be empty.
6356 */
6357 ret = XML_ELEMENT_TYPE_EMPTY;
6358 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6359 (NXT(2) == 'Y')) {
6360 SKIP(3);
6361 /*
6362 * Element is a generic container.
6363 */
6364 ret = XML_ELEMENT_TYPE_ANY;
6365 } else if (RAW == '(') {
6366 ret = xmlParseElementContentDecl(ctxt, name, &content);
6367 } else {
6368 /*
6369 * [ WFC: PEs in Internal Subset ] error handling.
6370 */
6371 if ((RAW == '%') && (ctxt->external == 0) &&
6372 (ctxt->inputNr == 1)) {
6373 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6374 "PEReference: forbidden within markup decl in internal subset\n");
6375 } else {
6376 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6377 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6378 }
6379 return(-1);
6380 }
6381
6382 SKIP_BLANKS;
6383 /*
6384 * Pop-up of finished entities.
6385 */
6386 while ((RAW == 0) && (ctxt->inputNr > 1))
6387 xmlPopInput(ctxt);
6388 SKIP_BLANKS;
6389
6390 if (RAW != '>') {
6391 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6392 if (content != NULL) {
6393 xmlFreeDocElementContent(ctxt->myDoc, content);
6394 }
6395 } else {
6396 if (input != ctxt->input) {
6397 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398 "Element declaration doesn't start and stop in the same entity\n");
6399 }
6400
6401 NEXT;
6402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6403 (ctxt->sax->elementDecl != NULL)) {
6404 if (content != NULL)
6405 content->parent = NULL;
6406 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6407 content);
6408 if ((content != NULL) && (content->parent == NULL)) {
6409 /*
6410 * this is a trick: if xmlAddElementDecl is called,
6411 * instead of copying the full tree it is plugged directly
6412 * if called from the parser. Avoid duplicating the
6413 * interfaces or change the API/ABI
6414 */
6415 xmlFreeDocElementContent(ctxt->myDoc, content);
6416 }
6417 } else if (content != NULL) {
6418 xmlFreeDocElementContent(ctxt->myDoc, content);
6419 }
6420 }
6421 }
6422 return(ret);
6423 }
6424
6425 /**
6426 * xmlParseConditionalSections
6427 * @ctxt: an XML parser context
6428 *
6429 * [61] conditionalSect ::= includeSect | ignoreSect
6430 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6434 */
6435
6436 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6437 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6438 int id = ctxt->input->id;
6439
6440 SKIP(3);
6441 SKIP_BLANKS;
6442 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6443 SKIP(7);
6444 SKIP_BLANKS;
6445 if (RAW != '[') {
6446 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6447 } else {
6448 if (ctxt->input->id != id) {
6449 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450 "All markup of the conditional section is not in the same entity\n",
6451 NULL, NULL);
6452 }
6453 NEXT;
6454 }
6455 if (xmlParserDebugEntities) {
6456 if ((ctxt->input != NULL) && (ctxt->input->filename))
6457 xmlGenericError(xmlGenericErrorContext,
6458 "%s(%d): ", ctxt->input->filename,
6459 ctxt->input->line);
6460 xmlGenericError(xmlGenericErrorContext,
6461 "Entering INCLUDE Conditional Section\n");
6462 }
6463
6464 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6465 (NXT(2) != '>'))) {
6466 const xmlChar *check = CUR_PTR;
6467 unsigned int cons = ctxt->input->consumed;
6468
6469 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470 xmlParseConditionalSections(ctxt);
6471 } else if (IS_BLANK_CH(CUR)) {
6472 NEXT;
6473 } else if (RAW == '%') {
6474 xmlParsePEReference(ctxt);
6475 } else
6476 xmlParseMarkupDecl(ctxt);
6477
6478 /*
6479 * Pop-up of finished entities.
6480 */
6481 while ((RAW == 0) && (ctxt->inputNr > 1))
6482 xmlPopInput(ctxt);
6483
6484 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6485 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6486 break;
6487 }
6488 }
6489 if (xmlParserDebugEntities) {
6490 if ((ctxt->input != NULL) && (ctxt->input->filename))
6491 xmlGenericError(xmlGenericErrorContext,
6492 "%s(%d): ", ctxt->input->filename,
6493 ctxt->input->line);
6494 xmlGenericError(xmlGenericErrorContext,
6495 "Leaving INCLUDE Conditional Section\n");
6496 }
6497
6498 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6499 int state;
6500 xmlParserInputState instate;
6501 int depth = 0;
6502
6503 SKIP(6);
6504 SKIP_BLANKS;
6505 if (RAW != '[') {
6506 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6507 } else {
6508 if (ctxt->input->id != id) {
6509 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510 "All markup of the conditional section is not in the same entity\n",
6511 NULL, NULL);
6512 }
6513 NEXT;
6514 }
6515 if (xmlParserDebugEntities) {
6516 if ((ctxt->input != NULL) && (ctxt->input->filename))
6517 xmlGenericError(xmlGenericErrorContext,
6518 "%s(%d): ", ctxt->input->filename,
6519 ctxt->input->line);
6520 xmlGenericError(xmlGenericErrorContext,
6521 "Entering IGNORE Conditional Section\n");
6522 }
6523
6524 /*
6525 * Parse up to the end of the conditional section
6526 * But disable SAX event generating DTD building in the meantime
6527 */
6528 state = ctxt->disableSAX;
6529 instate = ctxt->instate;
6530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6531 ctxt->instate = XML_PARSER_IGNORE;
6532
6533 while ((depth >= 0) && (RAW != 0)) {
6534 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535 depth++;
6536 SKIP(3);
6537 continue;
6538 }
6539 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540 if (--depth >= 0) SKIP(3);
6541 continue;
6542 }
6543 NEXT;
6544 continue;
6545 }
6546
6547 ctxt->disableSAX = state;
6548 ctxt->instate = instate;
6549
6550 if (xmlParserDebugEntities) {
6551 if ((ctxt->input != NULL) && (ctxt->input->filename))
6552 xmlGenericError(xmlGenericErrorContext,
6553 "%s(%d): ", ctxt->input->filename,
6554 ctxt->input->line);
6555 xmlGenericError(xmlGenericErrorContext,
6556 "Leaving IGNORE Conditional Section\n");
6557 }
6558
6559 } else {
6560 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6561 }
6562
6563 if (RAW == 0)
6564 SHRINK;
6565
6566 if (RAW == 0) {
6567 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6568 } else {
6569 if (ctxt->input->id != id) {
6570 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571 "All markup of the conditional section is not in the same entity\n",
6572 NULL, NULL);
6573 }
6574 SKIP(3);
6575 }
6576 }
6577
6578 /**
6579 * xmlParseMarkupDecl:
6580 * @ctxt: an XML parser context
6581 *
6582 * parse Markup declarations
6583 *
6584 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585 * NotationDecl | PI | Comment
6586 *
6587 * [ VC: Proper Declaration/PE Nesting ]
6588 * Parameter-entity replacement text must be properly nested with
6589 * markup declarations. That is to say, if either the first character
6590 * or the last character of a markup declaration (markupdecl above) is
6591 * contained in the replacement text for a parameter-entity reference,
6592 * both must be contained in the same replacement text.
6593 *
6594 * [ WFC: PEs in Internal Subset ]
6595 * In the internal DTD subset, parameter-entity references can occur
6596 * only where markup declarations can occur, not within markup declarations.
6597 * (This does not apply to references that occur in external parameter
6598 * entities or to the external subset.)
6599 */
6600 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6601 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6602 GROW;
6603 if (CUR == '<') {
6604 if (NXT(1) == '!') {
6605 switch (NXT(2)) {
6606 case 'E':
6607 if (NXT(3) == 'L')
6608 xmlParseElementDecl(ctxt);
6609 else if (NXT(3) == 'N')
6610 xmlParseEntityDecl(ctxt);
6611 break;
6612 case 'A':
6613 xmlParseAttributeListDecl(ctxt);
6614 break;
6615 case 'N':
6616 xmlParseNotationDecl(ctxt);
6617 break;
6618 case '-':
6619 xmlParseComment(ctxt);
6620 break;
6621 default:
6622 /* there is an error but it will be detected later */
6623 break;
6624 }
6625 } else if (NXT(1) == '?') {
6626 xmlParsePI(ctxt);
6627 }
6628 }
6629 /*
6630 * This is only for internal subset. On external entities,
6631 * the replacement is done before parsing stage
6632 */
6633 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634 xmlParsePEReference(ctxt);
6635
6636 /*
6637 * Conditional sections are allowed from entities included
6638 * by PE References in the internal subset.
6639 */
6640 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642 xmlParseConditionalSections(ctxt);
6643 }
6644 }
6645
6646 ctxt->instate = XML_PARSER_DTD;
6647 }
6648
6649 /**
6650 * xmlParseTextDecl:
6651 * @ctxt: an XML parser context
6652 *
6653 * parse an XML declaration header for external entities
6654 *
6655 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6656 */
6657
6658 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6659 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6660 xmlChar *version;
6661 const xmlChar *encoding;
6662
6663 /*
6664 * We know that '<?xml' is here.
6665 */
6666 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6667 SKIP(5);
6668 } else {
6669 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6670 return;
6671 }
6672
6673 if (!IS_BLANK_CH(CUR)) {
6674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675 "Space needed after '<?xml'\n");
6676 }
6677 SKIP_BLANKS;
6678
6679 /*
6680 * We may have the VersionInfo here.
6681 */
6682 version = xmlParseVersionInfo(ctxt);
6683 if (version == NULL)
6684 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6685 else {
6686 if (!IS_BLANK_CH(CUR)) {
6687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space needed here\n");
6689 }
6690 }
6691 ctxt->input->version = version;
6692
6693 /*
6694 * We must have the encoding declaration
6695 */
6696 encoding = xmlParseEncodingDecl(ctxt);
6697 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6698 /*
6699 * The XML REC instructs us to stop parsing right here
6700 */
6701 return;
6702 }
6703 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705 "Missing encoding in text declaration\n");
6706 }
6707
6708 SKIP_BLANKS;
6709 if ((RAW == '?') && (NXT(1) == '>')) {
6710 SKIP(2);
6711 } else if (RAW == '>') {
6712 /* Deprecated old WD ... */
6713 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6714 NEXT;
6715 } else {
6716 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6717 MOVETO_ENDTAG(CUR_PTR);
6718 NEXT;
6719 }
6720 }
6721
6722 /**
6723 * xmlParseExternalSubset:
6724 * @ctxt: an XML parser context
6725 * @ExternalID: the external identifier
6726 * @SystemID: the system identifier (or URL)
6727 *
6728 * parse Markup declarations from an external subset
6729 *
6730 * [30] extSubset ::= textDecl? extSubsetDecl
6731 *
6732 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6733 */
6734 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6735 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736 const xmlChar *SystemID) {
6737 xmlDetectSAX2(ctxt);
6738 GROW;
6739
6740 if ((ctxt->encoding == NULL) &&
6741 (ctxt->input->end - ctxt->input->cur >= 4)) {
6742 xmlChar start[4];
6743 xmlCharEncoding enc;
6744
6745 start[0] = RAW;
6746 start[1] = NXT(1);
6747 start[2] = NXT(2);
6748 start[3] = NXT(3);
6749 enc = xmlDetectCharEncoding(start, 4);
6750 if (enc != XML_CHAR_ENCODING_NONE)
6751 xmlSwitchEncoding(ctxt, enc);
6752 }
6753
6754 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6755 xmlParseTextDecl(ctxt);
6756 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6757 /*
6758 * The XML REC instructs us to stop parsing right here
6759 */
6760 ctxt->instate = XML_PARSER_EOF;
6761 return;
6762 }
6763 }
6764 if (ctxt->myDoc == NULL) {
6765 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6766 if (ctxt->myDoc == NULL) {
6767 xmlErrMemory(ctxt, "New Doc failed");
6768 return;
6769 }
6770 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6771 }
6772 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6774
6775 ctxt->instate = XML_PARSER_DTD;
6776 ctxt->external = 1;
6777 while (((RAW == '<') && (NXT(1) == '?')) ||
6778 ((RAW == '<') && (NXT(1) == '!')) ||
6779 (RAW == '%') || IS_BLANK_CH(CUR)) {
6780 const xmlChar *check = CUR_PTR;
6781 unsigned int cons = ctxt->input->consumed;
6782
6783 GROW;
6784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785 xmlParseConditionalSections(ctxt);
6786 } else if (IS_BLANK_CH(CUR)) {
6787 NEXT;
6788 } else if (RAW == '%') {
6789 xmlParsePEReference(ctxt);
6790 } else
6791 xmlParseMarkupDecl(ctxt);
6792
6793 /*
6794 * Pop-up of finished entities.
6795 */
6796 while ((RAW == 0) && (ctxt->inputNr > 1))
6797 xmlPopInput(ctxt);
6798
6799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6800 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6801 break;
6802 }
6803 }
6804
6805 if (RAW != 0) {
6806 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6807 }
6808
6809 }
6810
6811 /**
6812 * xmlParseReference:
6813 * @ctxt: an XML parser context
6814 *
6815 * parse and handle entity references in content, depending on the SAX
6816 * interface, this may end-up in a call to character() if this is a
6817 * CharRef, a predefined entity, if there is no reference() callback.
6818 * or if the parser was asked to switch to that mode.
6819 *
6820 * [67] Reference ::= EntityRef | CharRef
6821 */
6822 void
xmlParseReference(xmlParserCtxtPtr ctxt)6823 xmlParseReference(xmlParserCtxtPtr ctxt) {
6824 xmlEntityPtr ent;
6825 xmlChar *val;
6826 int was_checked;
6827 xmlNodePtr list = NULL;
6828 xmlParserErrors ret = XML_ERR_OK;
6829
6830
6831 if (RAW != '&')
6832 return;
6833
6834 /*
6835 * Simple case of a CharRef
6836 */
6837 if (NXT(1) == '#') {
6838 int i = 0;
6839 xmlChar out[10];
6840 int hex = NXT(2);
6841 int value = xmlParseCharRef(ctxt);
6842
6843 if (value == 0)
6844 return;
6845 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6846 /*
6847 * So we are using non-UTF-8 buffers
6848 * Check that the char fit on 8bits, if not
6849 * generate a CharRef.
6850 */
6851 if (value <= 0xFF) {
6852 out[0] = value;
6853 out[1] = 0;
6854 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855 (!ctxt->disableSAX))
6856 ctxt->sax->characters(ctxt->userData, out, 1);
6857 } else {
6858 if ((hex == 'x') || (hex == 'X'))
6859 snprintf((char *)out, sizeof(out), "#x%X", value);
6860 else
6861 snprintf((char *)out, sizeof(out), "#%d", value);
6862 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863 (!ctxt->disableSAX))
6864 ctxt->sax->reference(ctxt->userData, out);
6865 }
6866 } else {
6867 /*
6868 * Just encode the value in UTF-8
6869 */
6870 COPY_BUF(0 ,out, i, value);
6871 out[i] = 0;
6872 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873 (!ctxt->disableSAX))
6874 ctxt->sax->characters(ctxt->userData, out, i);
6875 }
6876 return;
6877 }
6878
6879 /*
6880 * We are seeing an entity reference
6881 */
6882 ent = xmlParseEntityRef(ctxt);
6883 if (ent == NULL) return;
6884 if (!ctxt->wellFormed)
6885 return;
6886 was_checked = ent->checked;
6887
6888 /* special case of predefined entities */
6889 if ((ent->name == NULL) ||
6890 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6891 val = ent->content;
6892 if (val == NULL) return;
6893 /*
6894 * inline the entity.
6895 */
6896 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 (!ctxt->disableSAX))
6898 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6899 return;
6900 }
6901
6902 /*
6903 * The first reference to the entity trigger a parsing phase
6904 * where the ent->children is filled with the result from
6905 * the parsing.
6906 */
6907 if (ent->checked == 0) {
6908 unsigned long oldnbent = ctxt->nbentities;
6909
6910 /*
6911 * This is a bit hackish but this seems the best
6912 * way to make sure both SAX and DOM entity support
6913 * behaves okay.
6914 */
6915 void *user_data;
6916 if (ctxt->userData == ctxt)
6917 user_data = NULL;
6918 else
6919 user_data = ctxt->userData;
6920
6921 /*
6922 * Check that this entity is well formed
6923 * 4.3.2: An internal general parsed entity is well-formed
6924 * if its replacement text matches the production labeled
6925 * content.
6926 */
6927 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6928 ctxt->depth++;
6929 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6930 user_data, &list);
6931 ctxt->depth--;
6932
6933 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6934 ctxt->depth++;
6935 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936 user_data, ctxt->depth, ent->URI,
6937 ent->ExternalID, &list);
6938 ctxt->depth--;
6939 } else {
6940 ret = XML_ERR_ENTITY_PE_INTERNAL;
6941 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942 "invalid entity type found\n", NULL);
6943 }
6944
6945 /*
6946 * Store the number of entities needing parsing for this entity
6947 * content and do checkings
6948 */
6949 ent->checked = ctxt->nbentities - oldnbent;
6950 if (ret == XML_ERR_ENTITY_LOOP) {
6951 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6952 xmlFreeNodeList(list);
6953 return;
6954 }
6955 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956 xmlFreeNodeList(list);
6957 return;
6958 }
6959
6960 if ((ret == XML_ERR_OK) && (list != NULL)) {
6961 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963 (ent->children == NULL)) {
6964 ent->children = list;
6965 if (ctxt->replaceEntities) {
6966 /*
6967 * Prune it directly in the generated document
6968 * except for single text nodes.
6969 */
6970 if (((list->type == XML_TEXT_NODE) &&
6971 (list->next == NULL)) ||
6972 (ctxt->parseMode == XML_PARSE_READER)) {
6973 list->parent = (xmlNodePtr) ent;
6974 list = NULL;
6975 ent->owner = 1;
6976 } else {
6977 ent->owner = 0;
6978 while (list != NULL) {
6979 list->parent = (xmlNodePtr) ctxt->node;
6980 list->doc = ctxt->myDoc;
6981 if (list->next == NULL)
6982 ent->last = list;
6983 list = list->next;
6984 }
6985 list = ent->children;
6986 #ifdef LIBXML_LEGACY_ENABLED
6987 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988 xmlAddEntityReference(ent, list, NULL);
6989 #endif /* LIBXML_LEGACY_ENABLED */
6990 }
6991 } else {
6992 ent->owner = 1;
6993 while (list != NULL) {
6994 list->parent = (xmlNodePtr) ent;
6995 xmlSetTreeDoc(list, ent->doc);
6996 if (list->next == NULL)
6997 ent->last = list;
6998 list = list->next;
6999 }
7000 }
7001 } else {
7002 xmlFreeNodeList(list);
7003 list = NULL;
7004 }
7005 } else if ((ret != XML_ERR_OK) &&
7006 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7007 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7008 "Entity '%s' failed to parse\n", ent->name);
7009 } else if (list != NULL) {
7010 xmlFreeNodeList(list);
7011 list = NULL;
7012 }
7013 if (ent->checked == 0)
7014 ent->checked = 1;
7015 } else if (ent->checked != 1) {
7016 ctxt->nbentities += ent->checked;
7017 }
7018
7019 /*
7020 * Now that the entity content has been gathered
7021 * provide it to the application, this can take different forms based
7022 * on the parsing modes.
7023 */
7024 if (ent->children == NULL) {
7025 /*
7026 * Probably running in SAX mode and the callbacks don't
7027 * build the entity content. So unless we already went
7028 * though parsing for first checking go though the entity
7029 * content to generate callbacks associated to the entity
7030 */
7031 if (was_checked != 0) {
7032 void *user_data;
7033 /*
7034 * This is a bit hackish but this seems the best
7035 * way to make sure both SAX and DOM entity support
7036 * behaves okay.
7037 */
7038 if (ctxt->userData == ctxt)
7039 user_data = NULL;
7040 else
7041 user_data = ctxt->userData;
7042
7043 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7044 ctxt->depth++;
7045 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7046 ent->content, user_data, NULL);
7047 ctxt->depth--;
7048 } else if (ent->etype ==
7049 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7050 ctxt->depth++;
7051 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7052 ctxt->sax, user_data, ctxt->depth,
7053 ent->URI, ent->ExternalID, NULL);
7054 ctxt->depth--;
7055 } else {
7056 ret = XML_ERR_ENTITY_PE_INTERNAL;
7057 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7058 "invalid entity type found\n", NULL);
7059 }
7060 if (ret == XML_ERR_ENTITY_LOOP) {
7061 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7062 return;
7063 }
7064 }
7065 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7066 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7067 /*
7068 * Entity reference callback comes second, it's somewhat
7069 * superfluous but a compatibility to historical behaviour
7070 */
7071 ctxt->sax->reference(ctxt->userData, ent->name);
7072 }
7073 return;
7074 }
7075
7076 /*
7077 * If we didn't get any children for the entity being built
7078 */
7079 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7080 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7081 /*
7082 * Create a node.
7083 */
7084 ctxt->sax->reference(ctxt->userData, ent->name);
7085 return;
7086 }
7087
7088 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7089 /*
7090 * There is a problem on the handling of _private for entities
7091 * (bug 155816): Should we copy the content of the field from
7092 * the entity (possibly overwriting some value set by the user
7093 * when a copy is created), should we leave it alone, or should
7094 * we try to take care of different situations? The problem
7095 * is exacerbated by the usage of this field by the xmlReader.
7096 * To fix this bug, we look at _private on the created node
7097 * and, if it's NULL, we copy in whatever was in the entity.
7098 * If it's not NULL we leave it alone. This is somewhat of a
7099 * hack - maybe we should have further tests to determine
7100 * what to do.
7101 */
7102 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7103 /*
7104 * Seems we are generating the DOM content, do
7105 * a simple tree copy for all references except the first
7106 * In the first occurrence list contains the replacement.
7107 * progressive == 2 means we are operating on the Reader
7108 * and since nodes are discarded we must copy all the time.
7109 */
7110 if (((list == NULL) && (ent->owner == 0)) ||
7111 (ctxt->parseMode == XML_PARSE_READER)) {
7112 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7113
7114 /*
7115 * when operating on a reader, the entities definitions
7116 * are always owning the entities subtree.
7117 if (ctxt->parseMode == XML_PARSE_READER)
7118 ent->owner = 1;
7119 */
7120
7121 cur = ent->children;
7122 while (cur != NULL) {
7123 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7124 if (nw != NULL) {
7125 if (nw->_private == NULL)
7126 nw->_private = cur->_private;
7127 if (firstChild == NULL){
7128 firstChild = nw;
7129 }
7130 nw = xmlAddChild(ctxt->node, nw);
7131 }
7132 if (cur == ent->last) {
7133 /*
7134 * needed to detect some strange empty
7135 * node cases in the reader tests
7136 */
7137 if ((ctxt->parseMode == XML_PARSE_READER) &&
7138 (nw != NULL) &&
7139 (nw->type == XML_ELEMENT_NODE) &&
7140 (nw->children == NULL))
7141 nw->extra = 1;
7142
7143 break;
7144 }
7145 cur = cur->next;
7146 }
7147 #ifdef LIBXML_LEGACY_ENABLED
7148 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7149 xmlAddEntityReference(ent, firstChild, nw);
7150 #endif /* LIBXML_LEGACY_ENABLED */
7151 } else if (list == NULL) {
7152 xmlNodePtr nw = NULL, cur, next, last,
7153 firstChild = NULL;
7154 /*
7155 * Copy the entity child list and make it the new
7156 * entity child list. The goal is to make sure any
7157 * ID or REF referenced will be the one from the
7158 * document content and not the entity copy.
7159 */
7160 cur = ent->children;
7161 ent->children = NULL;
7162 last = ent->last;
7163 ent->last = NULL;
7164 while (cur != NULL) {
7165 next = cur->next;
7166 cur->next = NULL;
7167 cur->parent = NULL;
7168 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7169 if (nw != NULL) {
7170 if (nw->_private == NULL)
7171 nw->_private = cur->_private;
7172 if (firstChild == NULL){
7173 firstChild = cur;
7174 }
7175 xmlAddChild((xmlNodePtr) ent, nw);
7176 xmlAddChild(ctxt->node, cur);
7177 }
7178 if (cur == last)
7179 break;
7180 cur = next;
7181 }
7182 if (ent->owner == 0)
7183 ent->owner = 1;
7184 #ifdef LIBXML_LEGACY_ENABLED
7185 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7186 xmlAddEntityReference(ent, firstChild, nw);
7187 #endif /* LIBXML_LEGACY_ENABLED */
7188 } else {
7189 const xmlChar *nbktext;
7190
7191 /*
7192 * the name change is to avoid coalescing of the
7193 * node with a possible previous text one which
7194 * would make ent->children a dangling pointer
7195 */
7196 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7197 -1);
7198 if (ent->children->type == XML_TEXT_NODE)
7199 ent->children->name = nbktext;
7200 if ((ent->last != ent->children) &&
7201 (ent->last->type == XML_TEXT_NODE))
7202 ent->last->name = nbktext;
7203 xmlAddChildList(ctxt->node, ent->children);
7204 }
7205
7206 /*
7207 * This is to avoid a nasty side effect, see
7208 * characters() in SAX.c
7209 */
7210 ctxt->nodemem = 0;
7211 ctxt->nodelen = 0;
7212 return;
7213 }
7214 }
7215 }
7216
7217 /**
7218 * xmlParseEntityRef:
7219 * @ctxt: an XML parser context
7220 *
7221 * parse ENTITY references declarations
7222 *
7223 * [68] EntityRef ::= '&' Name ';'
7224 *
7225 * [ WFC: Entity Declared ]
7226 * In a document without any DTD, a document with only an internal DTD
7227 * subset which contains no parameter entity references, or a document
7228 * with "standalone='yes'", the Name given in the entity reference
7229 * must match that in an entity declaration, except that well-formed
7230 * documents need not declare any of the following entities: amp, lt,
7231 * gt, apos, quot. The declaration of a parameter entity must precede
7232 * any reference to it. Similarly, the declaration of a general entity
7233 * must precede any reference to it which appears in a default value in an
7234 * attribute-list declaration. Note that if entities are declared in the
7235 * external subset or in external parameter entities, a non-validating
7236 * processor is not obligated to read and process their declarations;
7237 * for such documents, the rule that an entity must be declared is a
7238 * well-formedness constraint only if standalone='yes'.
7239 *
7240 * [ WFC: Parsed Entity ]
7241 * An entity reference must not contain the name of an unparsed entity
7242 *
7243 * Returns the xmlEntityPtr if found, or NULL otherwise.
7244 */
7245 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7246 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7247 const xmlChar *name;
7248 xmlEntityPtr ent = NULL;
7249
7250 GROW;
7251
7252 if (RAW != '&')
7253 return(NULL);
7254 NEXT;
7255 name = xmlParseName(ctxt);
7256 if (name == NULL) {
7257 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7258 "xmlParseEntityRef: no name\n");
7259 return(NULL);
7260 }
7261 if (RAW != ';') {
7262 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7263 return(NULL);
7264 }
7265 NEXT;
7266
7267 /*
7268 * Predefined entites override any extra definition
7269 */
7270 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7271 ent = xmlGetPredefinedEntity(name);
7272 if (ent != NULL)
7273 return(ent);
7274 }
7275
7276 /*
7277 * Increate the number of entity references parsed
7278 */
7279 ctxt->nbentities++;
7280
7281 /*
7282 * Ask first SAX for entity resolution, otherwise try the
7283 * entities which may have stored in the parser context.
7284 */
7285 if (ctxt->sax != NULL) {
7286 if (ctxt->sax->getEntity != NULL)
7287 ent = ctxt->sax->getEntity(ctxt->userData, name);
7288 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7289 (ctxt->options & XML_PARSE_OLDSAX))
7290 ent = xmlGetPredefinedEntity(name);
7291 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7292 (ctxt->userData==ctxt)) {
7293 ent = xmlSAX2GetEntity(ctxt, name);
7294 }
7295 }
7296 /*
7297 * [ WFC: Entity Declared ]
7298 * In a document without any DTD, a document with only an
7299 * internal DTD subset which contains no parameter entity
7300 * references, or a document with "standalone='yes'", the
7301 * Name given in the entity reference must match that in an
7302 * entity declaration, except that well-formed documents
7303 * need not declare any of the following entities: amp, lt,
7304 * gt, apos, quot.
7305 * The declaration of a parameter entity must precede any
7306 * reference to it.
7307 * Similarly, the declaration of a general entity must
7308 * precede any reference to it which appears in a default
7309 * value in an attribute-list declaration. Note that if
7310 * entities are declared in the external subset or in
7311 * external parameter entities, a non-validating processor
7312 * is not obligated to read and process their declarations;
7313 * for such documents, the rule that an entity must be
7314 * declared is a well-formedness constraint only if
7315 * standalone='yes'.
7316 */
7317 if (ent == NULL) {
7318 if ((ctxt->standalone == 1) ||
7319 ((ctxt->hasExternalSubset == 0) &&
7320 (ctxt->hasPErefs == 0))) {
7321 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7322 "Entity '%s' not defined\n", name);
7323 } else {
7324 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7325 "Entity '%s' not defined\n", name);
7326 if ((ctxt->inSubset == 0) &&
7327 (ctxt->sax != NULL) &&
7328 (ctxt->sax->reference != NULL)) {
7329 ctxt->sax->reference(ctxt->userData, name);
7330 }
7331 }
7332 ctxt->valid = 0;
7333 }
7334
7335 /*
7336 * [ WFC: Parsed Entity ]
7337 * An entity reference must not contain the name of an
7338 * unparsed entity
7339 */
7340 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7341 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7342 "Entity reference to unparsed entity %s\n", name);
7343 }
7344
7345 /*
7346 * [ WFC: No External Entity References ]
7347 * Attribute values cannot contain direct or indirect
7348 * entity references to external entities.
7349 */
7350 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7351 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7352 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7353 "Attribute references external entity '%s'\n", name);
7354 }
7355 /*
7356 * [ WFC: No < in Attribute Values ]
7357 * The replacement text of any entity referred to directly or
7358 * indirectly in an attribute value (other than "<") must
7359 * not contain a <.
7360 */
7361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7362 (ent != NULL) && (ent->content != NULL) &&
7363 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7364 (xmlStrchr(ent->content, '<'))) {
7365 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7366 "'<' in entity '%s' is not allowed in attributes values\n", name);
7367 }
7368
7369 /*
7370 * Internal check, no parameter entities here ...
7371 */
7372 else {
7373 switch (ent->etype) {
7374 case XML_INTERNAL_PARAMETER_ENTITY:
7375 case XML_EXTERNAL_PARAMETER_ENTITY:
7376 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7377 "Attempt to reference the parameter entity '%s'\n",
7378 name);
7379 break;
7380 default:
7381 break;
7382 }
7383 }
7384
7385 /*
7386 * [ WFC: No Recursion ]
7387 * A parsed entity must not contain a recursive reference
7388 * to itself, either directly or indirectly.
7389 * Done somewhere else
7390 */
7391 return(ent);
7392 }
7393
7394 /**
7395 * xmlParseStringEntityRef:
7396 * @ctxt: an XML parser context
7397 * @str: a pointer to an index in the string
7398 *
7399 * parse ENTITY references declarations, but this version parses it from
7400 * a string value.
7401 *
7402 * [68] EntityRef ::= '&' Name ';'
7403 *
7404 * [ WFC: Entity Declared ]
7405 * In a document without any DTD, a document with only an internal DTD
7406 * subset which contains no parameter entity references, or a document
7407 * with "standalone='yes'", the Name given in the entity reference
7408 * must match that in an entity declaration, except that well-formed
7409 * documents need not declare any of the following entities: amp, lt,
7410 * gt, apos, quot. The declaration of a parameter entity must precede
7411 * any reference to it. Similarly, the declaration of a general entity
7412 * must precede any reference to it which appears in a default value in an
7413 * attribute-list declaration. Note that if entities are declared in the
7414 * external subset or in external parameter entities, a non-validating
7415 * processor is not obligated to read and process their declarations;
7416 * for such documents, the rule that an entity must be declared is a
7417 * well-formedness constraint only if standalone='yes'.
7418 *
7419 * [ WFC: Parsed Entity ]
7420 * An entity reference must not contain the name of an unparsed entity
7421 *
7422 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7423 * is updated to the current location in the string.
7424 */
7425 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7426 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7427 xmlChar *name;
7428 const xmlChar *ptr;
7429 xmlChar cur;
7430 xmlEntityPtr ent = NULL;
7431
7432 if ((str == NULL) || (*str == NULL))
7433 return(NULL);
7434 ptr = *str;
7435 cur = *ptr;
7436 if (cur != '&')
7437 return(NULL);
7438
7439 ptr++;
7440 name = xmlParseStringName(ctxt, &ptr);
7441 if (name == NULL) {
7442 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7443 "xmlParseStringEntityRef: no name\n");
7444 *str = ptr;
7445 return(NULL);
7446 }
7447 if (*ptr != ';') {
7448 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7449 xmlFree(name);
7450 *str = ptr;
7451 return(NULL);
7452 }
7453 ptr++;
7454
7455
7456 /*
7457 * Predefined entites override any extra definition
7458 */
7459 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7460 ent = xmlGetPredefinedEntity(name);
7461 if (ent != NULL) {
7462 xmlFree(name);
7463 *str = ptr;
7464 return(ent);
7465 }
7466 }
7467
7468 /*
7469 * Increate the number of entity references parsed
7470 */
7471 ctxt->nbentities++;
7472
7473 /*
7474 * Ask first SAX for entity resolution, otherwise try the
7475 * entities which may have stored in the parser context.
7476 */
7477 if (ctxt->sax != NULL) {
7478 if (ctxt->sax->getEntity != NULL)
7479 ent = ctxt->sax->getEntity(ctxt->userData, name);
7480 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7481 ent = xmlGetPredefinedEntity(name);
7482 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7483 ent = xmlSAX2GetEntity(ctxt, name);
7484 }
7485 }
7486
7487 /*
7488 * [ WFC: Entity Declared ]
7489 * In a document without any DTD, a document with only an
7490 * internal DTD subset which contains no parameter entity
7491 * references, or a document with "standalone='yes'", the
7492 * Name given in the entity reference must match that in an
7493 * entity declaration, except that well-formed documents
7494 * need not declare any of the following entities: amp, lt,
7495 * gt, apos, quot.
7496 * The declaration of a parameter entity must precede any
7497 * reference to it.
7498 * Similarly, the declaration of a general entity must
7499 * precede any reference to it which appears in a default
7500 * value in an attribute-list declaration. Note that if
7501 * entities are declared in the external subset or in
7502 * external parameter entities, a non-validating processor
7503 * is not obligated to read and process their declarations;
7504 * for such documents, the rule that an entity must be
7505 * declared is a well-formedness constraint only if
7506 * standalone='yes'.
7507 */
7508 if (ent == NULL) {
7509 if ((ctxt->standalone == 1) ||
7510 ((ctxt->hasExternalSubset == 0) &&
7511 (ctxt->hasPErefs == 0))) {
7512 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7513 "Entity '%s' not defined\n", name);
7514 } else {
7515 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7516 "Entity '%s' not defined\n",
7517 name);
7518 }
7519 /* TODO ? check regressions ctxt->valid = 0; */
7520 }
7521
7522 /*
7523 * [ WFC: Parsed Entity ]
7524 * An entity reference must not contain the name of an
7525 * unparsed entity
7526 */
7527 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7528 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7529 "Entity reference to unparsed entity %s\n", name);
7530 }
7531
7532 /*
7533 * [ WFC: No External Entity References ]
7534 * Attribute values cannot contain direct or indirect
7535 * entity references to external entities.
7536 */
7537 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7538 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7539 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7540 "Attribute references external entity '%s'\n", name);
7541 }
7542 /*
7543 * [ WFC: No < in Attribute Values ]
7544 * The replacement text of any entity referred to directly or
7545 * indirectly in an attribute value (other than "<") must
7546 * not contain a <.
7547 */
7548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7549 (ent != NULL) && (ent->content != NULL) &&
7550 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7551 (xmlStrchr(ent->content, '<'))) {
7552 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7553 "'<' in entity '%s' is not allowed in attributes values\n",
7554 name);
7555 }
7556
7557 /*
7558 * Internal check, no parameter entities here ...
7559 */
7560 else {
7561 switch (ent->etype) {
7562 case XML_INTERNAL_PARAMETER_ENTITY:
7563 case XML_EXTERNAL_PARAMETER_ENTITY:
7564 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7565 "Attempt to reference the parameter entity '%s'\n",
7566 name);
7567 break;
7568 default:
7569 break;
7570 }
7571 }
7572
7573 /*
7574 * [ WFC: No Recursion ]
7575 * A parsed entity must not contain a recursive reference
7576 * to itself, either directly or indirectly.
7577 * Done somewhere else
7578 */
7579
7580 xmlFree(name);
7581 *str = ptr;
7582 return(ent);
7583 }
7584
7585 /**
7586 * xmlParsePEReference:
7587 * @ctxt: an XML parser context
7588 *
7589 * parse PEReference declarations
7590 * The entity content is handled directly by pushing it's content as
7591 * a new input stream.
7592 *
7593 * [69] PEReference ::= '%' Name ';'
7594 *
7595 * [ WFC: No Recursion ]
7596 * A parsed entity must not contain a recursive
7597 * reference to itself, either directly or indirectly.
7598 *
7599 * [ WFC: Entity Declared ]
7600 * In a document without any DTD, a document with only an internal DTD
7601 * subset which contains no parameter entity references, or a document
7602 * with "standalone='yes'", ... ... The declaration of a parameter
7603 * entity must precede any reference to it...
7604 *
7605 * [ VC: Entity Declared ]
7606 * In a document with an external subset or external parameter entities
7607 * with "standalone='no'", ... ... The declaration of a parameter entity
7608 * must precede any reference to it...
7609 *
7610 * [ WFC: In DTD ]
7611 * Parameter-entity references may only appear in the DTD.
7612 * NOTE: misleading but this is handled.
7613 */
7614 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7615 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7616 {
7617 const xmlChar *name;
7618 xmlEntityPtr entity = NULL;
7619 xmlParserInputPtr input;
7620
7621 if (RAW != '%')
7622 return;
7623 NEXT;
7624 name = xmlParseName(ctxt);
7625 if (name == NULL) {
7626 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7627 "xmlParsePEReference: no name\n");
7628 return;
7629 }
7630 if (RAW != ';') {
7631 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7632 return;
7633 }
7634
7635 NEXT;
7636
7637 /*
7638 * Increate the number of entity references parsed
7639 */
7640 ctxt->nbentities++;
7641
7642 /*
7643 * Request the entity from SAX
7644 */
7645 if ((ctxt->sax != NULL) &&
7646 (ctxt->sax->getParameterEntity != NULL))
7647 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7648 name);
7649 if (entity == NULL) {
7650 /*
7651 * [ WFC: Entity Declared ]
7652 * In a document without any DTD, a document with only an
7653 * internal DTD subset which contains no parameter entity
7654 * references, or a document with "standalone='yes'", ...
7655 * ... The declaration of a parameter entity must precede
7656 * any reference to it...
7657 */
7658 if ((ctxt->standalone == 1) ||
7659 ((ctxt->hasExternalSubset == 0) &&
7660 (ctxt->hasPErefs == 0))) {
7661 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7662 "PEReference: %%%s; not found\n",
7663 name);
7664 } else {
7665 /*
7666 * [ VC: Entity Declared ]
7667 * In a document with an external subset or external
7668 * parameter entities with "standalone='no'", ...
7669 * ... The declaration of a parameter entity must
7670 * precede any reference to it...
7671 */
7672 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7673 "PEReference: %%%s; not found\n",
7674 name, NULL);
7675 ctxt->valid = 0;
7676 }
7677 } else {
7678 /*
7679 * Internal checking in case the entity quest barfed
7680 */
7681 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7682 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 "Internal: %%%s; is not a parameter entity\n",
7685 name, NULL);
7686 } else if (ctxt->input->free != deallocblankswrapper) {
7687 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7688 if (xmlPushInput(ctxt, input) < 0)
7689 return;
7690 } else {
7691 /*
7692 * TODO !!!
7693 * handle the extra spaces added before and after
7694 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7695 */
7696 input = xmlNewEntityInputStream(ctxt, entity);
7697 if (xmlPushInput(ctxt, input) < 0)
7698 return;
7699 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7700 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7701 (IS_BLANK_CH(NXT(5)))) {
7702 xmlParseTextDecl(ctxt);
7703 if (ctxt->errNo ==
7704 XML_ERR_UNSUPPORTED_ENCODING) {
7705 /*
7706 * The XML REC instructs us to stop parsing
7707 * right here
7708 */
7709 ctxt->instate = XML_PARSER_EOF;
7710 return;
7711 }
7712 }
7713 }
7714 }
7715 ctxt->hasPErefs = 1;
7716 }
7717
7718 /**
7719 * xmlLoadEntityContent:
7720 * @ctxt: an XML parser context
7721 * @entity: an unloaded system entity
7722 *
7723 * Load the original content of the given system entity from the
7724 * ExternalID/SystemID given. This is to be used for Included in Literal
7725 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7726 *
7727 * Returns 0 in case of success and -1 in case of failure
7728 */
7729 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7730 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7731 xmlParserInputPtr input;
7732 xmlBufferPtr buf;
7733 int l, c;
7734 int count = 0;
7735
7736 if ((ctxt == NULL) || (entity == NULL) ||
7737 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7738 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7739 (entity->content != NULL)) {
7740 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7741 "xmlLoadEntityContent parameter error");
7742 return(-1);
7743 }
7744
7745 if (xmlParserDebugEntities)
7746 xmlGenericError(xmlGenericErrorContext,
7747 "Reading %s entity content input\n", entity->name);
7748
7749 buf = xmlBufferCreate();
7750 if (buf == NULL) {
7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 "xmlLoadEntityContent parameter error");
7753 return(-1);
7754 }
7755
7756 input = xmlNewEntityInputStream(ctxt, entity);
7757 if (input == NULL) {
7758 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7759 "xmlLoadEntityContent input error");
7760 xmlBufferFree(buf);
7761 return(-1);
7762 }
7763
7764 /*
7765 * Push the entity as the current input, read char by char
7766 * saving to the buffer until the end of the entity or an error
7767 */
7768 if (xmlPushInput(ctxt, input) < 0) {
7769 xmlBufferFree(buf);
7770 return(-1);
7771 }
7772
7773 GROW;
7774 c = CUR_CHAR(l);
7775 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7776 (IS_CHAR(c))) {
7777 xmlBufferAdd(buf, ctxt->input->cur, l);
7778 if (count++ > 100) {
7779 count = 0;
7780 GROW;
7781 }
7782 NEXTL(l);
7783 c = CUR_CHAR(l);
7784 }
7785
7786 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7787 xmlPopInput(ctxt);
7788 } else if (!IS_CHAR(c)) {
7789 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7790 "xmlLoadEntityContent: invalid char value %d\n",
7791 c);
7792 xmlBufferFree(buf);
7793 return(-1);
7794 }
7795 entity->content = buf->content;
7796 buf->content = NULL;
7797 xmlBufferFree(buf);
7798
7799 return(0);
7800 }
7801
7802 /**
7803 * xmlParseStringPEReference:
7804 * @ctxt: an XML parser context
7805 * @str: a pointer to an index in the string
7806 *
7807 * parse PEReference declarations
7808 *
7809 * [69] PEReference ::= '%' Name ';'
7810 *
7811 * [ WFC: No Recursion ]
7812 * A parsed entity must not contain a recursive
7813 * reference to itself, either directly or indirectly.
7814 *
7815 * [ WFC: Entity Declared ]
7816 * In a document without any DTD, a document with only an internal DTD
7817 * subset which contains no parameter entity references, or a document
7818 * with "standalone='yes'", ... ... The declaration of a parameter
7819 * entity must precede any reference to it...
7820 *
7821 * [ VC: Entity Declared ]
7822 * In a document with an external subset or external parameter entities
7823 * with "standalone='no'", ... ... The declaration of a parameter entity
7824 * must precede any reference to it...
7825 *
7826 * [ WFC: In DTD ]
7827 * Parameter-entity references may only appear in the DTD.
7828 * NOTE: misleading but this is handled.
7829 *
7830 * Returns the string of the entity content.
7831 * str is updated to the current value of the index
7832 */
7833 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7834 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7835 const xmlChar *ptr;
7836 xmlChar cur;
7837 xmlChar *name;
7838 xmlEntityPtr entity = NULL;
7839
7840 if ((str == NULL) || (*str == NULL)) return(NULL);
7841 ptr = *str;
7842 cur = *ptr;
7843 if (cur != '%')
7844 return(NULL);
7845 ptr++;
7846 name = xmlParseStringName(ctxt, &ptr);
7847 if (name == NULL) {
7848 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7849 "xmlParseStringPEReference: no name\n");
7850 *str = ptr;
7851 return(NULL);
7852 }
7853 cur = *ptr;
7854 if (cur != ';') {
7855 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7856 xmlFree(name);
7857 *str = ptr;
7858 return(NULL);
7859 }
7860 ptr++;
7861
7862 /*
7863 * Increate the number of entity references parsed
7864 */
7865 ctxt->nbentities++;
7866
7867 /*
7868 * Request the entity from SAX
7869 */
7870 if ((ctxt->sax != NULL) &&
7871 (ctxt->sax->getParameterEntity != NULL))
7872 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7873 name);
7874 if (entity == NULL) {
7875 /*
7876 * [ WFC: Entity Declared ]
7877 * In a document without any DTD, a document with only an
7878 * internal DTD subset which contains no parameter entity
7879 * references, or a document with "standalone='yes'", ...
7880 * ... The declaration of a parameter entity must precede
7881 * any reference to it...
7882 */
7883 if ((ctxt->standalone == 1) ||
7884 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7885 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7886 "PEReference: %%%s; not found\n", name);
7887 } else {
7888 /*
7889 * [ VC: Entity Declared ]
7890 * In a document with an external subset or external
7891 * parameter entities with "standalone='no'", ...
7892 * ... The declaration of a parameter entity must
7893 * precede any reference to it...
7894 */
7895 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7896 "PEReference: %%%s; not found\n",
7897 name, NULL);
7898 ctxt->valid = 0;
7899 }
7900 } else {
7901 /*
7902 * Internal checking in case the entity quest barfed
7903 */
7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "%%%s; is not a parameter entity\n",
7908 name, NULL);
7909 }
7910 }
7911 ctxt->hasPErefs = 1;
7912 xmlFree(name);
7913 *str = ptr;
7914 return(entity);
7915 }
7916
7917 /**
7918 * xmlParseDocTypeDecl:
7919 * @ctxt: an XML parser context
7920 *
7921 * parse a DOCTYPE declaration
7922 *
7923 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7924 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7925 *
7926 * [ VC: Root Element Type ]
7927 * The Name in the document type declaration must match the element
7928 * type of the root element.
7929 */
7930
7931 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7932 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7933 const xmlChar *name = NULL;
7934 xmlChar *ExternalID = NULL;
7935 xmlChar *URI = NULL;
7936
7937 /*
7938 * We know that '<!DOCTYPE' has been detected.
7939 */
7940 SKIP(9);
7941
7942 SKIP_BLANKS;
7943
7944 /*
7945 * Parse the DOCTYPE name.
7946 */
7947 name = xmlParseName(ctxt);
7948 if (name == NULL) {
7949 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7950 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7951 }
7952 ctxt->intSubName = name;
7953
7954 SKIP_BLANKS;
7955
7956 /*
7957 * Check for SystemID and ExternalID
7958 */
7959 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7960
7961 if ((URI != NULL) || (ExternalID != NULL)) {
7962 ctxt->hasExternalSubset = 1;
7963 }
7964 ctxt->extSubURI = URI;
7965 ctxt->extSubSystem = ExternalID;
7966
7967 SKIP_BLANKS;
7968
7969 /*
7970 * Create and update the internal subset.
7971 */
7972 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7973 (!ctxt->disableSAX))
7974 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7975
7976 /*
7977 * Is there any internal subset declarations ?
7978 * they are handled separately in xmlParseInternalSubset()
7979 */
7980 if (RAW == '[')
7981 return;
7982
7983 /*
7984 * We should be at the end of the DOCTYPE declaration.
7985 */
7986 if (RAW != '>') {
7987 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7988 }
7989 NEXT;
7990 }
7991
7992 /**
7993 * xmlParseInternalSubset:
7994 * @ctxt: an XML parser context
7995 *
7996 * parse the internal subset declaration
7997 *
7998 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7999 */
8000
8001 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8002 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8003 /*
8004 * Is there any DTD definition ?
8005 */
8006 if (RAW == '[') {
8007 ctxt->instate = XML_PARSER_DTD;
8008 NEXT;
8009 /*
8010 * Parse the succession of Markup declarations and
8011 * PEReferences.
8012 * Subsequence (markupdecl | PEReference | S)*
8013 */
8014 while (RAW != ']') {
8015 const xmlChar *check = CUR_PTR;
8016 unsigned int cons = ctxt->input->consumed;
8017
8018 SKIP_BLANKS;
8019 xmlParseMarkupDecl(ctxt);
8020 xmlParsePEReference(ctxt);
8021
8022 /*
8023 * Pop-up of finished entities.
8024 */
8025 while ((RAW == 0) && (ctxt->inputNr > 1))
8026 xmlPopInput(ctxt);
8027
8028 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8029 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8030 "xmlParseInternalSubset: error detected in Markup declaration\n");
8031 break;
8032 }
8033 }
8034 if (RAW == ']') {
8035 NEXT;
8036 SKIP_BLANKS;
8037 }
8038 }
8039
8040 /*
8041 * We should be at the end of the DOCTYPE declaration.
8042 */
8043 if (RAW != '>') {
8044 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8045 }
8046 NEXT;
8047 }
8048
8049 #ifdef LIBXML_SAX1_ENABLED
8050 /**
8051 * xmlParseAttribute:
8052 * @ctxt: an XML parser context
8053 * @value: a xmlChar ** used to store the value of the attribute
8054 *
8055 * parse an attribute
8056 *
8057 * [41] Attribute ::= Name Eq AttValue
8058 *
8059 * [ WFC: No External Entity References ]
8060 * Attribute values cannot contain direct or indirect entity references
8061 * to external entities.
8062 *
8063 * [ WFC: No < in Attribute Values ]
8064 * The replacement text of any entity referred to directly or indirectly in
8065 * an attribute value (other than "<") must not contain a <.
8066 *
8067 * [ VC: Attribute Value Type ]
8068 * The attribute must have been declared; the value must be of the type
8069 * declared for it.
8070 *
8071 * [25] Eq ::= S? '=' S?
8072 *
8073 * With namespace:
8074 *
8075 * [NS 11] Attribute ::= QName Eq AttValue
8076 *
8077 * Also the case QName == xmlns:??? is handled independently as a namespace
8078 * definition.
8079 *
8080 * Returns the attribute name, and the value in *value.
8081 */
8082
8083 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8084 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8085 const xmlChar *name;
8086 xmlChar *val;
8087
8088 *value = NULL;
8089 GROW;
8090 name = xmlParseName(ctxt);
8091 if (name == NULL) {
8092 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8093 "error parsing attribute name\n");
8094 return(NULL);
8095 }
8096
8097 /*
8098 * read the value
8099 */
8100 SKIP_BLANKS;
8101 if (RAW == '=') {
8102 NEXT;
8103 SKIP_BLANKS;
8104 val = xmlParseAttValue(ctxt);
8105 ctxt->instate = XML_PARSER_CONTENT;
8106 } else {
8107 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8108 "Specification mandate value for attribute %s\n", name);
8109 return(NULL);
8110 }
8111
8112 /*
8113 * Check that xml:lang conforms to the specification
8114 * No more registered as an error, just generate a warning now
8115 * since this was deprecated in XML second edition
8116 */
8117 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8118 if (!xmlCheckLanguageID(val)) {
8119 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8120 "Malformed value for xml:lang : %s\n",
8121 val, NULL);
8122 }
8123 }
8124
8125 /*
8126 * Check that xml:space conforms to the specification
8127 */
8128 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8129 if (xmlStrEqual(val, BAD_CAST "default"))
8130 *(ctxt->space) = 0;
8131 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8132 *(ctxt->space) = 1;
8133 else {
8134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8136 val, NULL);
8137 }
8138 }
8139
8140 *value = val;
8141 return(name);
8142 }
8143
8144 /**
8145 * xmlParseStartTag:
8146 * @ctxt: an XML parser context
8147 *
8148 * parse a start of tag either for rule element or
8149 * EmptyElement. In both case we don't parse the tag closing chars.
8150 *
8151 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8152 *
8153 * [ WFC: Unique Att Spec ]
8154 * No attribute name may appear more than once in the same start-tag or
8155 * empty-element tag.
8156 *
8157 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8158 *
8159 * [ WFC: Unique Att Spec ]
8160 * No attribute name may appear more than once in the same start-tag or
8161 * empty-element tag.
8162 *
8163 * With namespace:
8164 *
8165 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8166 *
8167 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8168 *
8169 * Returns the element name parsed
8170 */
8171
8172 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8173 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8174 const xmlChar *name;
8175 const xmlChar *attname;
8176 xmlChar *attvalue;
8177 const xmlChar **atts = ctxt->atts;
8178 int nbatts = 0;
8179 int maxatts = ctxt->maxatts;
8180 int i;
8181
8182 if (RAW != '<') return(NULL);
8183 NEXT1;
8184
8185 name = xmlParseName(ctxt);
8186 if (name == NULL) {
8187 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188 "xmlParseStartTag: invalid element name\n");
8189 return(NULL);
8190 }
8191
8192 /*
8193 * Now parse the attributes, it ends up with the ending
8194 *
8195 * (S Attribute)* S?
8196 */
8197 SKIP_BLANKS;
8198 GROW;
8199
8200 while ((RAW != '>') &&
8201 ((RAW != '/') || (NXT(1) != '>')) &&
8202 (IS_BYTE_CHAR(RAW))) {
8203 const xmlChar *q = CUR_PTR;
8204 unsigned int cons = ctxt->input->consumed;
8205
8206 attname = xmlParseAttribute(ctxt, &attvalue);
8207 if ((attname != NULL) && (attvalue != NULL)) {
8208 /*
8209 * [ WFC: Unique Att Spec ]
8210 * No attribute name may appear more than once in the same
8211 * start-tag or empty-element tag.
8212 */
8213 for (i = 0; i < nbatts;i += 2) {
8214 if (xmlStrEqual(atts[i], attname)) {
8215 xmlErrAttributeDup(ctxt, NULL, attname);
8216 xmlFree(attvalue);
8217 goto failed;
8218 }
8219 }
8220 /*
8221 * Add the pair to atts
8222 */
8223 if (atts == NULL) {
8224 maxatts = 22; /* allow for 10 attrs by default */
8225 atts = (const xmlChar **)
8226 xmlMalloc(maxatts * sizeof(xmlChar *));
8227 if (atts == NULL) {
8228 xmlErrMemory(ctxt, NULL);
8229 if (attvalue != NULL)
8230 xmlFree(attvalue);
8231 goto failed;
8232 }
8233 ctxt->atts = atts;
8234 ctxt->maxatts = maxatts;
8235 } else if (nbatts + 4 > maxatts) {
8236 const xmlChar **n;
8237
8238 maxatts *= 2;
8239 n = (const xmlChar **) xmlRealloc((void *) atts,
8240 maxatts * sizeof(const xmlChar *));
8241 if (n == NULL) {
8242 xmlErrMemory(ctxt, NULL);
8243 if (attvalue != NULL)
8244 xmlFree(attvalue);
8245 goto failed;
8246 }
8247 atts = n;
8248 ctxt->atts = atts;
8249 ctxt->maxatts = maxatts;
8250 }
8251 atts[nbatts++] = attname;
8252 atts[nbatts++] = attvalue;
8253 atts[nbatts] = NULL;
8254 atts[nbatts + 1] = NULL;
8255 } else {
8256 if (attvalue != NULL)
8257 xmlFree(attvalue);
8258 }
8259
8260 failed:
8261
8262 GROW
8263 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8264 break;
8265 if (!IS_BLANK_CH(RAW)) {
8266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8267 "attributes construct error\n");
8268 }
8269 SKIP_BLANKS;
8270 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8271 (attname == NULL) && (attvalue == NULL)) {
8272 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8273 "xmlParseStartTag: problem parsing attributes\n");
8274 break;
8275 }
8276 SHRINK;
8277 GROW;
8278 }
8279
8280 /*
8281 * SAX: Start of Element !
8282 */
8283 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8284 (!ctxt->disableSAX)) {
8285 if (nbatts > 0)
8286 ctxt->sax->startElement(ctxt->userData, name, atts);
8287 else
8288 ctxt->sax->startElement(ctxt->userData, name, NULL);
8289 }
8290
8291 if (atts != NULL) {
8292 /* Free only the content strings */
8293 for (i = 1;i < nbatts;i+=2)
8294 if (atts[i] != NULL)
8295 xmlFree((xmlChar *) atts[i]);
8296 }
8297 return(name);
8298 }
8299
8300 /**
8301 * xmlParseEndTag1:
8302 * @ctxt: an XML parser context
8303 * @line: line of the start tag
8304 * @nsNr: number of namespaces on the start tag
8305 *
8306 * parse an end of tag
8307 *
8308 * [42] ETag ::= '</' Name S? '>'
8309 *
8310 * With namespace
8311 *
8312 * [NS 9] ETag ::= '</' QName S? '>'
8313 */
8314
8315 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8316 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8317 const xmlChar *name;
8318
8319 GROW;
8320 if ((RAW != '<') || (NXT(1) != '/')) {
8321 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8322 "xmlParseEndTag: '</' not found\n");
8323 return;
8324 }
8325 SKIP(2);
8326
8327 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8328
8329 /*
8330 * We should definitely be at the ending "S? '>'" part
8331 */
8332 GROW;
8333 SKIP_BLANKS;
8334 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8335 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8336 } else
8337 NEXT1;
8338
8339 /*
8340 * [ WFC: Element Type Match ]
8341 * The Name in an element's end-tag must match the element type in the
8342 * start-tag.
8343 *
8344 */
8345 if (name != (xmlChar*)1) {
8346 if (name == NULL) name = BAD_CAST "unparseable";
8347 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8348 "Opening and ending tag mismatch: %s line %d and %s\n",
8349 ctxt->name, line, name);
8350 }
8351
8352 /*
8353 * SAX: End of Tag
8354 */
8355 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8356 (!ctxt->disableSAX))
8357 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8358
8359 namePop(ctxt);
8360 spacePop(ctxt);
8361 return;
8362 }
8363
8364 /**
8365 * xmlParseEndTag:
8366 * @ctxt: an XML parser context
8367 *
8368 * parse an end of tag
8369 *
8370 * [42] ETag ::= '</' Name S? '>'
8371 *
8372 * With namespace
8373 *
8374 * [NS 9] ETag ::= '</' QName S? '>'
8375 */
8376
8377 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8378 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8379 xmlParseEndTag1(ctxt, 0);
8380 }
8381 #endif /* LIBXML_SAX1_ENABLED */
8382
8383 /************************************************************************
8384 * *
8385 * SAX 2 specific operations *
8386 * *
8387 ************************************************************************/
8388
8389 /*
8390 * xmlGetNamespace:
8391 * @ctxt: an XML parser context
8392 * @prefix: the prefix to lookup
8393 *
8394 * Lookup the namespace name for the @prefix (which ca be NULL)
8395 * The prefix must come from the @ctxt->dict dictionnary
8396 *
8397 * Returns the namespace name or NULL if not bound
8398 */
8399 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8400 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8401 int i;
8402
8403 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8404 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8405 if (ctxt->nsTab[i] == prefix) {
8406 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8407 return(NULL);
8408 return(ctxt->nsTab[i + 1]);
8409 }
8410 return(NULL);
8411 }
8412
8413 /**
8414 * xmlParseQName:
8415 * @ctxt: an XML parser context
8416 * @prefix: pointer to store the prefix part
8417 *
8418 * parse an XML Namespace QName
8419 *
8420 * [6] QName ::= (Prefix ':')? LocalPart
8421 * [7] Prefix ::= NCName
8422 * [8] LocalPart ::= NCName
8423 *
8424 * Returns the Name parsed or NULL
8425 */
8426
8427 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8428 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8429 const xmlChar *l, *p;
8430
8431 GROW;
8432
8433 l = xmlParseNCName(ctxt);
8434 if (l == NULL) {
8435 if (CUR == ':') {
8436 l = xmlParseName(ctxt);
8437 if (l != NULL) {
8438 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8439 "Failed to parse QName '%s'\n", l, NULL, NULL);
8440 *prefix = NULL;
8441 return(l);
8442 }
8443 }
8444 return(NULL);
8445 }
8446 if (CUR == ':') {
8447 NEXT;
8448 p = l;
8449 l = xmlParseNCName(ctxt);
8450 if (l == NULL) {
8451 xmlChar *tmp;
8452
8453 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8454 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8455 l = xmlParseNmtoken(ctxt);
8456 if (l == NULL)
8457 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8458 else {
8459 tmp = xmlBuildQName(l, p, NULL, 0);
8460 xmlFree((char *)l);
8461 }
8462 p = xmlDictLookup(ctxt->dict, tmp, -1);
8463 if (tmp != NULL) xmlFree(tmp);
8464 *prefix = NULL;
8465 return(p);
8466 }
8467 if (CUR == ':') {
8468 xmlChar *tmp;
8469
8470 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8471 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8472 NEXT;
8473 tmp = (xmlChar *) xmlParseName(ctxt);
8474 if (tmp != NULL) {
8475 tmp = xmlBuildQName(tmp, l, NULL, 0);
8476 l = xmlDictLookup(ctxt->dict, tmp, -1);
8477 if (tmp != NULL) xmlFree(tmp);
8478 *prefix = p;
8479 return(l);
8480 }
8481 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8482 l = xmlDictLookup(ctxt->dict, tmp, -1);
8483 if (tmp != NULL) xmlFree(tmp);
8484 *prefix = p;
8485 return(l);
8486 }
8487 *prefix = p;
8488 } else
8489 *prefix = NULL;
8490 return(l);
8491 }
8492
8493 /**
8494 * xmlParseQNameAndCompare:
8495 * @ctxt: an XML parser context
8496 * @name: the localname
8497 * @prefix: the prefix, if any.
8498 *
8499 * parse an XML name and compares for match
8500 * (specialized for endtag parsing)
8501 *
8502 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8503 * and the name for mismatch
8504 */
8505
8506 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8507 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8508 xmlChar const *prefix) {
8509 const xmlChar *cmp;
8510 const xmlChar *in;
8511 const xmlChar *ret;
8512 const xmlChar *prefix2;
8513
8514 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8515
8516 GROW;
8517 in = ctxt->input->cur;
8518
8519 cmp = prefix;
8520 while (*in != 0 && *in == *cmp) {
8521 ++in;
8522 ++cmp;
8523 }
8524 if ((*cmp == 0) && (*in == ':')) {
8525 in++;
8526 cmp = name;
8527 while (*in != 0 && *in == *cmp) {
8528 ++in;
8529 ++cmp;
8530 }
8531 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8532 /* success */
8533 ctxt->input->cur = in;
8534 return((const xmlChar*) 1);
8535 }
8536 }
8537 /*
8538 * all strings coms from the dictionary, equality can be done directly
8539 */
8540 ret = xmlParseQName (ctxt, &prefix2);
8541 if ((ret == name) && (prefix == prefix2))
8542 return((const xmlChar*) 1);
8543 return ret;
8544 }
8545
8546 /**
8547 * xmlParseAttValueInternal:
8548 * @ctxt: an XML parser context
8549 * @len: attribute len result
8550 * @alloc: whether the attribute was reallocated as a new string
8551 * @normalize: if 1 then further non-CDATA normalization must be done
8552 *
8553 * parse a value for an attribute.
8554 * NOTE: if no normalization is needed, the routine will return pointers
8555 * directly from the data buffer.
8556 *
8557 * 3.3.3 Attribute-Value Normalization:
8558 * Before the value of an attribute is passed to the application or
8559 * checked for validity, the XML processor must normalize it as follows:
8560 * - a character reference is processed by appending the referenced
8561 * character to the attribute value
8562 * - an entity reference is processed by recursively processing the
8563 * replacement text of the entity
8564 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8565 * appending #x20 to the normalized value, except that only a single
8566 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8567 * parsed entity or the literal entity value of an internal parsed entity
8568 * - other characters are processed by appending them to the normalized value
8569 * If the declared value is not CDATA, then the XML processor must further
8570 * process the normalized attribute value by discarding any leading and
8571 * trailing space (#x20) characters, and by replacing sequences of space
8572 * (#x20) characters by a single space (#x20) character.
8573 * All attributes for which no declaration has been read should be treated
8574 * by a non-validating parser as if declared CDATA.
8575 *
8576 * Returns the AttValue parsed or NULL. The value has to be freed by the
8577 * caller if it was copied, this can be detected by val[*len] == 0.
8578 */
8579
8580 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8581 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8582 int normalize)
8583 {
8584 xmlChar limit = 0;
8585 const xmlChar *in = NULL, *start, *end, *last;
8586 xmlChar *ret = NULL;
8587
8588 GROW;
8589 in = (xmlChar *) CUR_PTR;
8590 if (*in != '"' && *in != '\'') {
8591 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8592 return (NULL);
8593 }
8594 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8595
8596 /*
8597 * try to handle in this routine the most common case where no
8598 * allocation of a new string is required and where content is
8599 * pure ASCII.
8600 */
8601 limit = *in++;
8602 end = ctxt->input->end;
8603 start = in;
8604 if (in >= end) {
8605 const xmlChar *oldbase = ctxt->input->base;
8606 GROW;
8607 if (oldbase != ctxt->input->base) {
8608 long delta = ctxt->input->base - oldbase;
8609 start = start + delta;
8610 in = in + delta;
8611 }
8612 end = ctxt->input->end;
8613 }
8614 if (normalize) {
8615 /*
8616 * Skip any leading spaces
8617 */
8618 while ((in < end) && (*in != limit) &&
8619 ((*in == 0x20) || (*in == 0x9) ||
8620 (*in == 0xA) || (*in == 0xD))) {
8621 in++;
8622 start = in;
8623 if (in >= end) {
8624 const xmlChar *oldbase = ctxt->input->base;
8625 GROW;
8626 if (oldbase != ctxt->input->base) {
8627 long delta = ctxt->input->base - oldbase;
8628 start = start + delta;
8629 in = in + delta;
8630 }
8631 end = ctxt->input->end;
8632 }
8633 }
8634 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8635 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8636 if ((*in++ == 0x20) && (*in == 0x20)) break;
8637 if (in >= end) {
8638 const xmlChar *oldbase = ctxt->input->base;
8639 GROW;
8640 if (oldbase != ctxt->input->base) {
8641 long delta = ctxt->input->base - oldbase;
8642 start = start + delta;
8643 in = in + delta;
8644 }
8645 end = ctxt->input->end;
8646 }
8647 }
8648 last = in;
8649 /*
8650 * skip the trailing blanks
8651 */
8652 while ((last[-1] == 0x20) && (last > start)) last--;
8653 while ((in < end) && (*in != limit) &&
8654 ((*in == 0x20) || (*in == 0x9) ||
8655 (*in == 0xA) || (*in == 0xD))) {
8656 in++;
8657 if (in >= end) {
8658 const xmlChar *oldbase = ctxt->input->base;
8659 GROW;
8660 if (oldbase != ctxt->input->base) {
8661 long delta = ctxt->input->base - oldbase;
8662 start = start + delta;
8663 in = in + delta;
8664 last = last + delta;
8665 }
8666 end = ctxt->input->end;
8667 }
8668 }
8669 if (*in != limit) goto need_complex;
8670 } else {
8671 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8672 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8673 in++;
8674 if (in >= end) {
8675 const xmlChar *oldbase = ctxt->input->base;
8676 GROW;
8677 if (oldbase != ctxt->input->base) {
8678 long delta = ctxt->input->base - oldbase;
8679 start = start + delta;
8680 in = in + delta;
8681 }
8682 end = ctxt->input->end;
8683 }
8684 }
8685 last = in;
8686 if (*in != limit) goto need_complex;
8687 }
8688 in++;
8689 if (len != NULL) {
8690 *len = last - start;
8691 ret = (xmlChar *) start;
8692 } else {
8693 if (alloc) *alloc = 1;
8694 ret = xmlStrndup(start, last - start);
8695 }
8696 CUR_PTR = in;
8697 if (alloc) *alloc = 0;
8698 return ret;
8699 need_complex:
8700 if (alloc) *alloc = 1;
8701 return xmlParseAttValueComplex(ctxt, len, normalize);
8702 }
8703
8704 /**
8705 * xmlParseAttribute2:
8706 * @ctxt: an XML parser context
8707 * @pref: the element prefix
8708 * @elem: the element name
8709 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8710 * @value: a xmlChar ** used to store the value of the attribute
8711 * @len: an int * to save the length of the attribute
8712 * @alloc: an int * to indicate if the attribute was allocated
8713 *
8714 * parse an attribute in the new SAX2 framework.
8715 *
8716 * Returns the attribute name, and the value in *value, .
8717 */
8718
8719 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8720 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8721 const xmlChar * pref, const xmlChar * elem,
8722 const xmlChar ** prefix, xmlChar ** value,
8723 int *len, int *alloc)
8724 {
8725 const xmlChar *name;
8726 xmlChar *val, *internal_val = NULL;
8727 int normalize = 0;
8728
8729 *value = NULL;
8730 GROW;
8731 name = xmlParseQName(ctxt, prefix);
8732 if (name == NULL) {
8733 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8734 "error parsing attribute name\n");
8735 return (NULL);
8736 }
8737
8738 /*
8739 * get the type if needed
8740 */
8741 if (ctxt->attsSpecial != NULL) {
8742 int type;
8743
8744 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8745 pref, elem, *prefix, name);
8746 if (type != 0)
8747 normalize = 1;
8748 }
8749
8750 /*
8751 * read the value
8752 */
8753 SKIP_BLANKS;
8754 if (RAW == '=') {
8755 NEXT;
8756 SKIP_BLANKS;
8757 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8758 if (normalize) {
8759 /*
8760 * Sometimes a second normalisation pass for spaces is needed
8761 * but that only happens if charrefs or entities refernces
8762 * have been used in the attribute value, i.e. the attribute
8763 * value have been extracted in an allocated string already.
8764 */
8765 if (*alloc) {
8766 const xmlChar *val2;
8767
8768 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8769 if ((val2 != NULL) && (val2 != val)) {
8770 xmlFree(val);
8771 val = (xmlChar *) val2;
8772 }
8773 }
8774 }
8775 ctxt->instate = XML_PARSER_CONTENT;
8776 } else {
8777 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8778 "Specification mandate value for attribute %s\n",
8779 name);
8780 return (NULL);
8781 }
8782
8783 if (*prefix == ctxt->str_xml) {
8784 /*
8785 * Check that xml:lang conforms to the specification
8786 * No more registered as an error, just generate a warning now
8787 * since this was deprecated in XML second edition
8788 */
8789 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8790 internal_val = xmlStrndup(val, *len);
8791 if (!xmlCheckLanguageID(internal_val)) {
8792 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8793 "Malformed value for xml:lang : %s\n",
8794 internal_val, NULL);
8795 }
8796 }
8797
8798 /*
8799 * Check that xml:space conforms to the specification
8800 */
8801 if (xmlStrEqual(name, BAD_CAST "space")) {
8802 internal_val = xmlStrndup(val, *len);
8803 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8804 *(ctxt->space) = 0;
8805 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8806 *(ctxt->space) = 1;
8807 else {
8808 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8809 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8810 internal_val, NULL);
8811 }
8812 }
8813 if (internal_val) {
8814 xmlFree(internal_val);
8815 }
8816 }
8817
8818 *value = val;
8819 return (name);
8820 }
8821 /**
8822 * xmlParseStartTag2:
8823 * @ctxt: an XML parser context
8824 *
8825 * parse a start of tag either for rule element or
8826 * EmptyElement. In both case we don't parse the tag closing chars.
8827 * This routine is called when running SAX2 parsing
8828 *
8829 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8830 *
8831 * [ WFC: Unique Att Spec ]
8832 * No attribute name may appear more than once in the same start-tag or
8833 * empty-element tag.
8834 *
8835 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8836 *
8837 * [ WFC: Unique Att Spec ]
8838 * No attribute name may appear more than once in the same start-tag or
8839 * empty-element tag.
8840 *
8841 * With namespace:
8842 *
8843 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8844 *
8845 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8846 *
8847 * Returns the element name parsed
8848 */
8849
8850 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8851 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8852 const xmlChar **URI, int *tlen) {
8853 const xmlChar *localname;
8854 const xmlChar *prefix;
8855 const xmlChar *attname;
8856 const xmlChar *aprefix;
8857 const xmlChar *nsname;
8858 xmlChar *attvalue;
8859 const xmlChar **atts = ctxt->atts;
8860 int maxatts = ctxt->maxatts;
8861 int nratts, nbatts, nbdef;
8862 int i, j, nbNs, attval, oldline, oldcol;
8863 const xmlChar *base;
8864 unsigned long cur;
8865 int nsNr = ctxt->nsNr;
8866
8867 if (RAW != '<') return(NULL);
8868 NEXT1;
8869
8870 /*
8871 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8872 * point since the attribute values may be stored as pointers to
8873 * the buffer and calling SHRINK would destroy them !
8874 * The Shrinking is only possible once the full set of attribute
8875 * callbacks have been done.
8876 */
8877 reparse:
8878 SHRINK;
8879 base = ctxt->input->base;
8880 cur = ctxt->input->cur - ctxt->input->base;
8881 oldline = ctxt->input->line;
8882 oldcol = ctxt->input->col;
8883 nbatts = 0;
8884 nratts = 0;
8885 nbdef = 0;
8886 nbNs = 0;
8887 attval = 0;
8888 /* Forget any namespaces added during an earlier parse of this element. */
8889 ctxt->nsNr = nsNr;
8890
8891 localname = xmlParseQName(ctxt, &prefix);
8892 if (localname == NULL) {
8893 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8894 "StartTag: invalid element name\n");
8895 return(NULL);
8896 }
8897 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8898
8899 /*
8900 * Now parse the attributes, it ends up with the ending
8901 *
8902 * (S Attribute)* S?
8903 */
8904 SKIP_BLANKS;
8905 GROW;
8906 if (ctxt->input->base != base) goto base_changed;
8907
8908 while ((RAW != '>') &&
8909 ((RAW != '/') || (NXT(1) != '>')) &&
8910 (IS_BYTE_CHAR(RAW))) {
8911 const xmlChar *q = CUR_PTR;
8912 unsigned int cons = ctxt->input->consumed;
8913 int len = -1, alloc = 0;
8914
8915 attname = xmlParseAttribute2(ctxt, prefix, localname,
8916 &aprefix, &attvalue, &len, &alloc);
8917 if (ctxt->input->base != base) {
8918 if ((attvalue != NULL) && (alloc != 0))
8919 xmlFree(attvalue);
8920 attvalue = NULL;
8921 goto base_changed;
8922 }
8923 if ((attname != NULL) && (attvalue != NULL)) {
8924 if (len < 0) len = xmlStrlen(attvalue);
8925 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8926 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8927 xmlURIPtr uri;
8928
8929 if (*URL != 0) {
8930 uri = xmlParseURI((const char *) URL);
8931 if (uri == NULL) {
8932 xmlNsErr(ctxt, XML_WAR_NS_URI,
8933 "xmlns: '%s' is not a valid URI\n",
8934 URL, NULL, NULL);
8935 } else {
8936 if (uri->scheme == NULL) {
8937 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8938 "xmlns: URI %s is not absolute\n",
8939 URL, NULL, NULL);
8940 }
8941 xmlFreeURI(uri);
8942 }
8943 if (URL == ctxt->str_xml_ns) {
8944 if (attname != ctxt->str_xml) {
8945 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8946 "xml namespace URI cannot be the default namespace\n",
8947 NULL, NULL, NULL);
8948 }
8949 goto skip_default_ns;
8950 }
8951 if ((len == 29) &&
8952 (xmlStrEqual(URL,
8953 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8954 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8955 "reuse of the xmlns namespace name is forbidden\n",
8956 NULL, NULL, NULL);
8957 goto skip_default_ns;
8958 }
8959 }
8960 /*
8961 * check that it's not a defined namespace
8962 */
8963 for (j = 1;j <= nbNs;j++)
8964 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8965 break;
8966 if (j <= nbNs)
8967 xmlErrAttributeDup(ctxt, NULL, attname);
8968 else
8969 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8970 skip_default_ns:
8971 if (alloc != 0) xmlFree(attvalue);
8972 SKIP_BLANKS;
8973 continue;
8974 }
8975 if (aprefix == ctxt->str_xmlns) {
8976 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8977 xmlURIPtr uri;
8978
8979 if (attname == ctxt->str_xml) {
8980 if (URL != ctxt->str_xml_ns) {
8981 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8982 "xml namespace prefix mapped to wrong URI\n",
8983 NULL, NULL, NULL);
8984 }
8985 /*
8986 * Do not keep a namespace definition node
8987 */
8988 goto skip_ns;
8989 }
8990 if (URL == ctxt->str_xml_ns) {
8991 if (attname != ctxt->str_xml) {
8992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 "xml namespace URI mapped to wrong prefix\n",
8994 NULL, NULL, NULL);
8995 }
8996 goto skip_ns;
8997 }
8998 if (attname == ctxt->str_xmlns) {
8999 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9000 "redefinition of the xmlns prefix is forbidden\n",
9001 NULL, NULL, NULL);
9002 goto skip_ns;
9003 }
9004 if ((len == 29) &&
9005 (xmlStrEqual(URL,
9006 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9007 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9008 "reuse of the xmlns namespace name is forbidden\n",
9009 NULL, NULL, NULL);
9010 goto skip_ns;
9011 }
9012 if ((URL == NULL) || (URL[0] == 0)) {
9013 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9014 "xmlns:%s: Empty XML namespace is not allowed\n",
9015 attname, NULL, NULL);
9016 goto skip_ns;
9017 } else {
9018 uri = xmlParseURI((const char *) URL);
9019 if (uri == NULL) {
9020 xmlNsErr(ctxt, XML_WAR_NS_URI,
9021 "xmlns:%s: '%s' is not a valid URI\n",
9022 attname, URL, NULL);
9023 } else {
9024 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9025 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9026 "xmlns:%s: URI %s is not absolute\n",
9027 attname, URL, NULL);
9028 }
9029 xmlFreeURI(uri);
9030 }
9031 }
9032
9033 /*
9034 * check that it's not a defined namespace
9035 */
9036 for (j = 1;j <= nbNs;j++)
9037 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9038 break;
9039 if (j <= nbNs)
9040 xmlErrAttributeDup(ctxt, aprefix, attname);
9041 else
9042 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9043 skip_ns:
9044 if (alloc != 0) xmlFree(attvalue);
9045 SKIP_BLANKS;
9046 if (ctxt->input->base != base) goto base_changed;
9047 continue;
9048 }
9049
9050 /*
9051 * Add the pair to atts
9052 */
9053 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9054 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9055 if (attvalue[len] == 0)
9056 xmlFree(attvalue);
9057 goto failed;
9058 }
9059 maxatts = ctxt->maxatts;
9060 atts = ctxt->atts;
9061 }
9062 ctxt->attallocs[nratts++] = alloc;
9063 atts[nbatts++] = attname;
9064 atts[nbatts++] = aprefix;
9065 atts[nbatts++] = NULL; /* the URI will be fetched later */
9066 atts[nbatts++] = attvalue;
9067 attvalue += len;
9068 atts[nbatts++] = attvalue;
9069 /*
9070 * tag if some deallocation is needed
9071 */
9072 if (alloc != 0) attval = 1;
9073 } else {
9074 if ((attvalue != NULL) && (attvalue[len] == 0))
9075 xmlFree(attvalue);
9076 }
9077
9078 failed:
9079
9080 GROW
9081 if (ctxt->input->base != base) goto base_changed;
9082 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9083 break;
9084 if (!IS_BLANK_CH(RAW)) {
9085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9086 "attributes construct error\n");
9087 break;
9088 }
9089 SKIP_BLANKS;
9090 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9091 (attname == NULL) && (attvalue == NULL)) {
9092 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9093 "xmlParseStartTag: problem parsing attributes\n");
9094 break;
9095 }
9096 GROW;
9097 if (ctxt->input->base != base) goto base_changed;
9098 }
9099
9100 /*
9101 * The attributes defaulting
9102 */
9103 if (ctxt->attsDefault != NULL) {
9104 xmlDefAttrsPtr defaults;
9105
9106 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9107 if (defaults != NULL) {
9108 for (i = 0;i < defaults->nbAttrs;i++) {
9109 attname = defaults->values[5 * i];
9110 aprefix = defaults->values[5 * i + 1];
9111
9112 /*
9113 * special work for namespaces defaulted defs
9114 */
9115 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9116 /*
9117 * check that it's not a defined namespace
9118 */
9119 for (j = 1;j <= nbNs;j++)
9120 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9121 break;
9122 if (j <= nbNs) continue;
9123
9124 nsname = xmlGetNamespace(ctxt, NULL);
9125 if (nsname != defaults->values[5 * i + 2]) {
9126 if (nsPush(ctxt, NULL,
9127 defaults->values[5 * i + 2]) > 0)
9128 nbNs++;
9129 }
9130 } else if (aprefix == ctxt->str_xmlns) {
9131 /*
9132 * check that it's not a defined namespace
9133 */
9134 for (j = 1;j <= nbNs;j++)
9135 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9136 break;
9137 if (j <= nbNs) continue;
9138
9139 nsname = xmlGetNamespace(ctxt, attname);
9140 if (nsname != defaults->values[2]) {
9141 if (nsPush(ctxt, attname,
9142 defaults->values[5 * i + 2]) > 0)
9143 nbNs++;
9144 }
9145 } else {
9146 /*
9147 * check that it's not a defined attribute
9148 */
9149 for (j = 0;j < nbatts;j+=5) {
9150 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9151 break;
9152 }
9153 if (j < nbatts) continue;
9154
9155 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9156 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9157 return(NULL);
9158 }
9159 maxatts = ctxt->maxatts;
9160 atts = ctxt->atts;
9161 }
9162 atts[nbatts++] = attname;
9163 atts[nbatts++] = aprefix;
9164 if (aprefix == NULL)
9165 atts[nbatts++] = NULL;
9166 else
9167 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9168 atts[nbatts++] = defaults->values[5 * i + 2];
9169 atts[nbatts++] = defaults->values[5 * i + 3];
9170 if ((ctxt->standalone == 1) &&
9171 (defaults->values[5 * i + 4] != NULL)) {
9172 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9173 "standalone: attribute %s on %s defaulted from external subset\n",
9174 attname, localname);
9175 }
9176 nbdef++;
9177 }
9178 }
9179 }
9180 }
9181
9182 /*
9183 * The attributes checkings
9184 */
9185 for (i = 0; i < nbatts;i += 5) {
9186 /*
9187 * The default namespace does not apply to attribute names.
9188 */
9189 if (atts[i + 1] != NULL) {
9190 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9191 if (nsname == NULL) {
9192 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9193 "Namespace prefix %s for %s on %s is not defined\n",
9194 atts[i + 1], atts[i], localname);
9195 }
9196 atts[i + 2] = nsname;
9197 } else
9198 nsname = NULL;
9199 /*
9200 * [ WFC: Unique Att Spec ]
9201 * No attribute name may appear more than once in the same
9202 * start-tag or empty-element tag.
9203 * As extended by the Namespace in XML REC.
9204 */
9205 for (j = 0; j < i;j += 5) {
9206 if (atts[i] == atts[j]) {
9207 if (atts[i+1] == atts[j+1]) {
9208 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9209 break;
9210 }
9211 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9212 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9213 "Namespaced Attribute %s in '%s' redefined\n",
9214 atts[i], nsname, NULL);
9215 break;
9216 }
9217 }
9218 }
9219 }
9220
9221 nsname = xmlGetNamespace(ctxt, prefix);
9222 if ((prefix != NULL) && (nsname == NULL)) {
9223 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9224 "Namespace prefix %s on %s is not defined\n",
9225 prefix, localname, NULL);
9226 }
9227 *pref = prefix;
9228 *URI = nsname;
9229
9230 /*
9231 * SAX: Start of Element !
9232 */
9233 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9234 (!ctxt->disableSAX)) {
9235 if (nbNs > 0)
9236 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9237 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9238 nbatts / 5, nbdef, atts);
9239 else
9240 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9241 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9242 }
9243
9244 /*
9245 * Free up attribute allocated strings if needed
9246 */
9247 if (attval != 0) {
9248 for (i = 3,j = 0; j < nratts;i += 5,j++)
9249 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9250 xmlFree((xmlChar *) atts[i]);
9251 }
9252
9253 return(localname);
9254
9255 base_changed:
9256 /*
9257 * the attribute strings are valid iif the base didn't changed
9258 */
9259 if (attval != 0) {
9260 for (i = 3,j = 0; j < nratts;i += 5,j++)
9261 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9262 xmlFree((xmlChar *) atts[i]);
9263 }
9264 ctxt->input->cur = ctxt->input->base + cur;
9265 ctxt->input->line = oldline;
9266 ctxt->input->col = oldcol;
9267 if (ctxt->wellFormed == 1) {
9268 goto reparse;
9269 }
9270 return(NULL);
9271 }
9272
9273 /**
9274 * xmlParseEndTag2:
9275 * @ctxt: an XML parser context
9276 * @line: line of the start tag
9277 * @nsNr: number of namespaces on the start tag
9278 *
9279 * parse an end of tag
9280 *
9281 * [42] ETag ::= '</' Name S? '>'
9282 *
9283 * With namespace
9284 *
9285 * [NS 9] ETag ::= '</' QName S? '>'
9286 */
9287
9288 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9289 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9290 const xmlChar *URI, int line, int nsNr, int tlen) {
9291 const xmlChar *name;
9292
9293 GROW;
9294 if ((RAW != '<') || (NXT(1) != '/')) {
9295 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9296 return;
9297 }
9298 SKIP(2);
9299
9300 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9301 if (ctxt->input->cur[tlen] == '>') {
9302 ctxt->input->cur += tlen + 1;
9303 goto done;
9304 }
9305 ctxt->input->cur += tlen;
9306 name = (xmlChar*)1;
9307 } else {
9308 if (prefix == NULL)
9309 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9310 else
9311 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9312 }
9313
9314 /*
9315 * We should definitely be at the ending "S? '>'" part
9316 */
9317 GROW;
9318 SKIP_BLANKS;
9319 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9320 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9321 } else
9322 NEXT1;
9323
9324 /*
9325 * [ WFC: Element Type Match ]
9326 * The Name in an element's end-tag must match the element type in the
9327 * start-tag.
9328 *
9329 */
9330 if (name != (xmlChar*)1) {
9331 if (name == NULL) name = BAD_CAST "unparseable";
9332 if ((line == 0) && (ctxt->node != NULL))
9333 line = ctxt->node->line;
9334 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9335 "Opening and ending tag mismatch: %s line %d and %s\n",
9336 ctxt->name, line, name);
9337 }
9338
9339 /*
9340 * SAX: End of Tag
9341 */
9342 done:
9343 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9344 (!ctxt->disableSAX))
9345 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9346
9347 spacePop(ctxt);
9348 if (nsNr != 0)
9349 nsPop(ctxt, nsNr);
9350 return;
9351 }
9352
9353 /**
9354 * xmlParseCDSect:
9355 * @ctxt: an XML parser context
9356 *
9357 * Parse escaped pure raw content.
9358 *
9359 * [18] CDSect ::= CDStart CData CDEnd
9360 *
9361 * [19] CDStart ::= '<![CDATA['
9362 *
9363 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9364 *
9365 * [21] CDEnd ::= ']]>'
9366 */
9367 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9368 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9369 xmlChar *buf = NULL;
9370 int len = 0;
9371 int size = XML_PARSER_BUFFER_SIZE;
9372 int r, rl;
9373 int s, sl;
9374 int cur, l;
9375 int count = 0;
9376
9377 /* Check 2.6.0 was NXT(0) not RAW */
9378 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9379 SKIP(9);
9380 } else
9381 return;
9382
9383 ctxt->instate = XML_PARSER_CDATA_SECTION;
9384 r = CUR_CHAR(rl);
9385 if (!IS_CHAR(r)) {
9386 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9387 ctxt->instate = XML_PARSER_CONTENT;
9388 return;
9389 }
9390 NEXTL(rl);
9391 s = CUR_CHAR(sl);
9392 if (!IS_CHAR(s)) {
9393 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9394 ctxt->instate = XML_PARSER_CONTENT;
9395 return;
9396 }
9397 NEXTL(sl);
9398 cur = CUR_CHAR(l);
9399 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9400 if (buf == NULL) {
9401 xmlErrMemory(ctxt, NULL);
9402 return;
9403 }
9404 while (IS_CHAR(cur) &&
9405 ((r != ']') || (s != ']') || (cur != '>'))) {
9406 if (len + 5 >= size) {
9407 xmlChar *tmp;
9408
9409 size *= 2;
9410 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9411 if (tmp == NULL) {
9412 xmlFree(buf);
9413 xmlErrMemory(ctxt, NULL);
9414 return;
9415 }
9416 buf = tmp;
9417 }
9418 COPY_BUF(rl,buf,len,r);
9419 r = s;
9420 rl = sl;
9421 s = cur;
9422 sl = l;
9423 count++;
9424 if (count > 50) {
9425 GROW;
9426 count = 0;
9427 }
9428 NEXTL(l);
9429 cur = CUR_CHAR(l);
9430 }
9431 buf[len] = 0;
9432 ctxt->instate = XML_PARSER_CONTENT;
9433 if (cur != '>') {
9434 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9435 "CData section not finished\n%.50s\n", buf);
9436 xmlFree(buf);
9437 return;
9438 }
9439 NEXTL(l);
9440
9441 /*
9442 * OK the buffer is to be consumed as cdata.
9443 */
9444 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9445 if (ctxt->sax->cdataBlock != NULL)
9446 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9447 else if (ctxt->sax->characters != NULL)
9448 ctxt->sax->characters(ctxt->userData, buf, len);
9449 }
9450 xmlFree(buf);
9451 }
9452
9453 /**
9454 * xmlParseContent:
9455 * @ctxt: an XML parser context
9456 *
9457 * Parse a content:
9458 *
9459 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9460 */
9461
9462 void
xmlParseContent(xmlParserCtxtPtr ctxt)9463 xmlParseContent(xmlParserCtxtPtr ctxt) {
9464 GROW;
9465 while ((RAW != 0) &&
9466 ((RAW != '<') || (NXT(1) != '/')) &&
9467 (ctxt->instate != XML_PARSER_EOF)) {
9468 const xmlChar *test = CUR_PTR;
9469 unsigned int cons = ctxt->input->consumed;
9470 const xmlChar *cur = ctxt->input->cur;
9471
9472 /*
9473 * First case : a Processing Instruction.
9474 */
9475 if ((*cur == '<') && (cur[1] == '?')) {
9476 xmlParsePI(ctxt);
9477 }
9478
9479 /*
9480 * Second case : a CDSection
9481 */
9482 /* 2.6.0 test was *cur not RAW */
9483 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9484 xmlParseCDSect(ctxt);
9485 }
9486
9487 /*
9488 * Third case : a comment
9489 */
9490 else if ((*cur == '<') && (NXT(1) == '!') &&
9491 (NXT(2) == '-') && (NXT(3) == '-')) {
9492 xmlParseComment(ctxt);
9493 ctxt->instate = XML_PARSER_CONTENT;
9494 }
9495
9496 /*
9497 * Fourth case : a sub-element.
9498 */
9499 else if (*cur == '<') {
9500 xmlParseElement(ctxt);
9501 }
9502
9503 /*
9504 * Fifth case : a reference. If if has not been resolved,
9505 * parsing returns it's Name, create the node
9506 */
9507
9508 else if (*cur == '&') {
9509 xmlParseReference(ctxt);
9510 }
9511
9512 /*
9513 * Last case, text. Note that References are handled directly.
9514 */
9515 else {
9516 xmlParseCharData(ctxt, 0);
9517 }
9518
9519 GROW;
9520 /*
9521 * Pop-up of finished entities.
9522 */
9523 while ((RAW == 0) && (ctxt->inputNr > 1))
9524 xmlPopInput(ctxt);
9525 SHRINK;
9526
9527 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9528 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9529 "detected an error in element content\n");
9530 ctxt->instate = XML_PARSER_EOF;
9531 break;
9532 }
9533 }
9534 }
9535
9536 /**
9537 * xmlParseElement:
9538 * @ctxt: an XML parser context
9539 *
9540 * parse an XML element, this is highly recursive
9541 *
9542 * [39] element ::= EmptyElemTag | STag content ETag
9543 *
9544 * [ WFC: Element Type Match ]
9545 * The Name in an element's end-tag must match the element type in the
9546 * start-tag.
9547 *
9548 */
9549
9550 void
xmlParseElement(xmlParserCtxtPtr ctxt)9551 xmlParseElement(xmlParserCtxtPtr ctxt) {
9552 const xmlChar *name;
9553 const xmlChar *prefix = NULL;
9554 const xmlChar *URI = NULL;
9555 xmlParserNodeInfo node_info;
9556 int line, tlen;
9557 xmlNodePtr ret;
9558 int nsNr = ctxt->nsNr;
9559
9560 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9561 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9562 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9563 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9564 xmlParserMaxDepth);
9565 ctxt->instate = XML_PARSER_EOF;
9566 return;
9567 }
9568
9569 /* Capture start position */
9570 if (ctxt->record_info) {
9571 node_info.begin_pos = ctxt->input->consumed +
9572 (CUR_PTR - ctxt->input->base);
9573 node_info.begin_line = ctxt->input->line;
9574 }
9575
9576 if (ctxt->spaceNr == 0)
9577 spacePush(ctxt, -1);
9578 else if (*ctxt->space == -2)
9579 spacePush(ctxt, -1);
9580 else
9581 spacePush(ctxt, *ctxt->space);
9582
9583 line = ctxt->input->line;
9584 #ifdef LIBXML_SAX1_ENABLED
9585 if (ctxt->sax2)
9586 #endif /* LIBXML_SAX1_ENABLED */
9587 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9588 #ifdef LIBXML_SAX1_ENABLED
9589 else
9590 name = xmlParseStartTag(ctxt);
9591 #endif /* LIBXML_SAX1_ENABLED */
9592 if (name == NULL) {
9593 spacePop(ctxt);
9594 return;
9595 }
9596 namePush(ctxt, name);
9597 ret = ctxt->node;
9598
9599 #ifdef LIBXML_VALID_ENABLED
9600 /*
9601 * [ VC: Root Element Type ]
9602 * The Name in the document type declaration must match the element
9603 * type of the root element.
9604 */
9605 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9606 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9607 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9608 #endif /* LIBXML_VALID_ENABLED */
9609
9610 /*
9611 * Check for an Empty Element.
9612 */
9613 if ((RAW == '/') && (NXT(1) == '>')) {
9614 SKIP(2);
9615 if (ctxt->sax2) {
9616 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9617 (!ctxt->disableSAX))
9618 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9619 #ifdef LIBXML_SAX1_ENABLED
9620 } else {
9621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9622 (!ctxt->disableSAX))
9623 ctxt->sax->endElement(ctxt->userData, name);
9624 #endif /* LIBXML_SAX1_ENABLED */
9625 }
9626 namePop(ctxt);
9627 spacePop(ctxt);
9628 if (nsNr != ctxt->nsNr)
9629 nsPop(ctxt, ctxt->nsNr - nsNr);
9630 if ( ret != NULL && ctxt->record_info ) {
9631 node_info.end_pos = ctxt->input->consumed +
9632 (CUR_PTR - ctxt->input->base);
9633 node_info.end_line = ctxt->input->line;
9634 node_info.node = ret;
9635 xmlParserAddNodeInfo(ctxt, &node_info);
9636 }
9637 return;
9638 }
9639 if (RAW == '>') {
9640 NEXT1;
9641 } else {
9642 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9643 "Couldn't find end of Start Tag %s line %d\n",
9644 name, line, NULL);
9645
9646 /*
9647 * end of parsing of this node.
9648 */
9649 nodePop(ctxt);
9650 namePop(ctxt);
9651 spacePop(ctxt);
9652 if (nsNr != ctxt->nsNr)
9653 nsPop(ctxt, ctxt->nsNr - nsNr);
9654
9655 /*
9656 * Capture end position and add node
9657 */
9658 if ( ret != NULL && ctxt->record_info ) {
9659 node_info.end_pos = ctxt->input->consumed +
9660 (CUR_PTR - ctxt->input->base);
9661 node_info.end_line = ctxt->input->line;
9662 node_info.node = ret;
9663 xmlParserAddNodeInfo(ctxt, &node_info);
9664 }
9665 return;
9666 }
9667
9668 /*
9669 * Parse the content of the element:
9670 */
9671 xmlParseContent(ctxt);
9672 if (!IS_BYTE_CHAR(RAW)) {
9673 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9674 "Premature end of data in tag %s line %d\n",
9675 name, line, NULL);
9676
9677 /*
9678 * end of parsing of this node.
9679 */
9680 nodePop(ctxt);
9681 namePop(ctxt);
9682 spacePop(ctxt);
9683 if (nsNr != ctxt->nsNr)
9684 nsPop(ctxt, ctxt->nsNr - nsNr);
9685 return;
9686 }
9687
9688 /*
9689 * parse the end of tag: '</' should be here.
9690 */
9691 if (ctxt->sax2) {
9692 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9693 namePop(ctxt);
9694 }
9695 #ifdef LIBXML_SAX1_ENABLED
9696 else
9697 xmlParseEndTag1(ctxt, line);
9698 #endif /* LIBXML_SAX1_ENABLED */
9699
9700 /*
9701 * Capture end position and add node
9702 */
9703 if ( ret != NULL && ctxt->record_info ) {
9704 node_info.end_pos = ctxt->input->consumed +
9705 (CUR_PTR - ctxt->input->base);
9706 node_info.end_line = ctxt->input->line;
9707 node_info.node = ret;
9708 xmlParserAddNodeInfo(ctxt, &node_info);
9709 }
9710 }
9711
9712 /**
9713 * xmlParseVersionNum:
9714 * @ctxt: an XML parser context
9715 *
9716 * parse the XML version value.
9717 *
9718 * [26] VersionNum ::= '1.' [0-9]+
9719 *
9720 * In practice allow [0-9].[0-9]+ at that level
9721 *
9722 * Returns the string giving the XML version number, or NULL
9723 */
9724 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9725 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9726 xmlChar *buf = NULL;
9727 int len = 0;
9728 int size = 10;
9729 xmlChar cur;
9730
9731 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9732 if (buf == NULL) {
9733 xmlErrMemory(ctxt, NULL);
9734 return(NULL);
9735 }
9736 cur = CUR;
9737 if (!((cur >= '0') && (cur <= '9'))) {
9738 xmlFree(buf);
9739 return(NULL);
9740 }
9741 buf[len++] = cur;
9742 NEXT;
9743 cur=CUR;
9744 if (cur != '.') {
9745 xmlFree(buf);
9746 return(NULL);
9747 }
9748 buf[len++] = cur;
9749 NEXT;
9750 cur=CUR;
9751 while ((cur >= '0') && (cur <= '9')) {
9752 if (len + 1 >= size) {
9753 xmlChar *tmp;
9754
9755 size *= 2;
9756 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9757 if (tmp == NULL) {
9758 xmlFree(buf);
9759 xmlErrMemory(ctxt, NULL);
9760 return(NULL);
9761 }
9762 buf = tmp;
9763 }
9764 buf[len++] = cur;
9765 NEXT;
9766 cur=CUR;
9767 }
9768 buf[len] = 0;
9769 return(buf);
9770 }
9771
9772 /**
9773 * xmlParseVersionInfo:
9774 * @ctxt: an XML parser context
9775 *
9776 * parse the XML version.
9777 *
9778 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9779 *
9780 * [25] Eq ::= S? '=' S?
9781 *
9782 * Returns the version string, e.g. "1.0"
9783 */
9784
9785 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9786 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9787 xmlChar *version = NULL;
9788
9789 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9790 SKIP(7);
9791 SKIP_BLANKS;
9792 if (RAW != '=') {
9793 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9794 return(NULL);
9795 }
9796 NEXT;
9797 SKIP_BLANKS;
9798 if (RAW == '"') {
9799 NEXT;
9800 version = xmlParseVersionNum(ctxt);
9801 if (RAW != '"') {
9802 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9803 } else
9804 NEXT;
9805 } else if (RAW == '\''){
9806 NEXT;
9807 version = xmlParseVersionNum(ctxt);
9808 if (RAW != '\'') {
9809 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9810 } else
9811 NEXT;
9812 } else {
9813 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9814 }
9815 }
9816 return(version);
9817 }
9818
9819 /**
9820 * xmlParseEncName:
9821 * @ctxt: an XML parser context
9822 *
9823 * parse the XML encoding name
9824 *
9825 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9826 *
9827 * Returns the encoding name value or NULL
9828 */
9829 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9830 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9831 xmlChar *buf = NULL;
9832 int len = 0;
9833 int size = 10;
9834 xmlChar cur;
9835
9836 cur = CUR;
9837 if (((cur >= 'a') && (cur <= 'z')) ||
9838 ((cur >= 'A') && (cur <= 'Z'))) {
9839 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9840 if (buf == NULL) {
9841 xmlErrMemory(ctxt, NULL);
9842 return(NULL);
9843 }
9844
9845 buf[len++] = cur;
9846 NEXT;
9847 cur = CUR;
9848 while (((cur >= 'a') && (cur <= 'z')) ||
9849 ((cur >= 'A') && (cur <= 'Z')) ||
9850 ((cur >= '0') && (cur <= '9')) ||
9851 (cur == '.') || (cur == '_') ||
9852 (cur == '-')) {
9853 if (len + 1 >= size) {
9854 xmlChar *tmp;
9855
9856 size *= 2;
9857 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9858 if (tmp == NULL) {
9859 xmlErrMemory(ctxt, NULL);
9860 xmlFree(buf);
9861 return(NULL);
9862 }
9863 buf = tmp;
9864 }
9865 buf[len++] = cur;
9866 NEXT;
9867 cur = CUR;
9868 if (cur == 0) {
9869 SHRINK;
9870 GROW;
9871 cur = CUR;
9872 }
9873 }
9874 buf[len] = 0;
9875 } else {
9876 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9877 }
9878 return(buf);
9879 }
9880
9881 /**
9882 * xmlParseEncodingDecl:
9883 * @ctxt: an XML parser context
9884 *
9885 * parse the XML encoding declaration
9886 *
9887 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9888 *
9889 * this setups the conversion filters.
9890 *
9891 * Returns the encoding value or NULL
9892 */
9893
9894 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9895 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9896 xmlChar *encoding = NULL;
9897
9898 SKIP_BLANKS;
9899 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9900 SKIP(8);
9901 SKIP_BLANKS;
9902 if (RAW != '=') {
9903 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9904 return(NULL);
9905 }
9906 NEXT;
9907 SKIP_BLANKS;
9908 if (RAW == '"') {
9909 NEXT;
9910 encoding = xmlParseEncName(ctxt);
9911 if (RAW != '"') {
9912 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9913 } else
9914 NEXT;
9915 } else if (RAW == '\''){
9916 NEXT;
9917 encoding = xmlParseEncName(ctxt);
9918 if (RAW != '\'') {
9919 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9920 } else
9921 NEXT;
9922 } else {
9923 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9924 }
9925 /*
9926 * UTF-16 encoding stwich has already taken place at this stage,
9927 * more over the little-endian/big-endian selection is already done
9928 */
9929 if ((encoding != NULL) &&
9930 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9931 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9932 /*
9933 * If no encoding was passed to the parser, that we are
9934 * using UTF-16 and no decoder is present i.e. the
9935 * document is apparently UTF-8 compatible, then raise an
9936 * encoding mismatch fatal error
9937 */
9938 if ((ctxt->encoding == NULL) &&
9939 (ctxt->input->buf != NULL) &&
9940 (ctxt->input->buf->encoder == NULL)) {
9941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9942 "Document labelled UTF-16 but has UTF-8 content\n");
9943 }
9944 if (ctxt->encoding != NULL)
9945 xmlFree((xmlChar *) ctxt->encoding);
9946 ctxt->encoding = encoding;
9947 }
9948 /*
9949 * UTF-8 encoding is handled natively
9950 */
9951 else if ((encoding != NULL) &&
9952 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9953 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9954 if (ctxt->encoding != NULL)
9955 xmlFree((xmlChar *) ctxt->encoding);
9956 ctxt->encoding = encoding;
9957 }
9958 else if (encoding != NULL) {
9959 xmlCharEncodingHandlerPtr handler;
9960
9961 if (ctxt->input->encoding != NULL)
9962 xmlFree((xmlChar *) ctxt->input->encoding);
9963 ctxt->input->encoding = encoding;
9964
9965 handler = xmlFindCharEncodingHandler((const char *) encoding);
9966 if (handler != NULL) {
9967 xmlSwitchToEncoding(ctxt, handler);
9968 } else {
9969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9970 "Unsupported encoding %s\n", encoding);
9971 return(NULL);
9972 }
9973 }
9974 }
9975 return(encoding);
9976 }
9977
9978 /**
9979 * xmlParseSDDecl:
9980 * @ctxt: an XML parser context
9981 *
9982 * parse the XML standalone declaration
9983 *
9984 * [32] SDDecl ::= S 'standalone' Eq
9985 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9986 *
9987 * [ VC: Standalone Document Declaration ]
9988 * TODO The standalone document declaration must have the value "no"
9989 * if any external markup declarations contain declarations of:
9990 * - attributes with default values, if elements to which these
9991 * attributes apply appear in the document without specifications
9992 * of values for these attributes, or
9993 * - entities (other than amp, lt, gt, apos, quot), if references
9994 * to those entities appear in the document, or
9995 * - attributes with values subject to normalization, where the
9996 * attribute appears in the document with a value which will change
9997 * as a result of normalization, or
9998 * - element types with element content, if white space occurs directly
9999 * within any instance of those types.
10000 *
10001 * Returns:
10002 * 1 if standalone="yes"
10003 * 0 if standalone="no"
10004 * -2 if standalone attribute is missing or invalid
10005 * (A standalone value of -2 means that the XML declaration was found,
10006 * but no value was specified for the standalone attribute).
10007 */
10008
10009 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10010 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10011 int standalone = -2;
10012
10013 SKIP_BLANKS;
10014 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10015 SKIP(10);
10016 SKIP_BLANKS;
10017 if (RAW != '=') {
10018 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10019 return(standalone);
10020 }
10021 NEXT;
10022 SKIP_BLANKS;
10023 if (RAW == '\''){
10024 NEXT;
10025 if ((RAW == 'n') && (NXT(1) == 'o')) {
10026 standalone = 0;
10027 SKIP(2);
10028 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10029 (NXT(2) == 's')) {
10030 standalone = 1;
10031 SKIP(3);
10032 } else {
10033 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10034 }
10035 if (RAW != '\'') {
10036 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10037 } else
10038 NEXT;
10039 } else if (RAW == '"'){
10040 NEXT;
10041 if ((RAW == 'n') && (NXT(1) == 'o')) {
10042 standalone = 0;
10043 SKIP(2);
10044 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10045 (NXT(2) == 's')) {
10046 standalone = 1;
10047 SKIP(3);
10048 } else {
10049 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10050 }
10051 if (RAW != '"') {
10052 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10053 } else
10054 NEXT;
10055 } else {
10056 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10057 }
10058 }
10059 return(standalone);
10060 }
10061
10062 /**
10063 * xmlParseXMLDecl:
10064 * @ctxt: an XML parser context
10065 *
10066 * parse an XML declaration header
10067 *
10068 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10069 */
10070
10071 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10072 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10073 xmlChar *version;
10074
10075 /*
10076 * This value for standalone indicates that the document has an
10077 * XML declaration but it does not have a standalone attribute.
10078 * It will be overwritten later if a standalone attribute is found.
10079 */
10080 ctxt->input->standalone = -2;
10081
10082 /*
10083 * We know that '<?xml' is here.
10084 */
10085 SKIP(5);
10086
10087 if (!IS_BLANK_CH(RAW)) {
10088 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10089 "Blank needed after '<?xml'\n");
10090 }
10091 SKIP_BLANKS;
10092
10093 /*
10094 * We must have the VersionInfo here.
10095 */
10096 version = xmlParseVersionInfo(ctxt);
10097 if (version == NULL) {
10098 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10099 } else {
10100 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10101 /*
10102 * Changed here for XML-1.0 5th edition
10103 */
10104 if (ctxt->options & XML_PARSE_OLD10) {
10105 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10106 "Unsupported version '%s'\n",
10107 version);
10108 } else {
10109 if ((version[0] == '1') && ((version[1] == '.'))) {
10110 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10111 "Unsupported version '%s'\n",
10112 version, NULL);
10113 } else {
10114 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10115 "Unsupported version '%s'\n",
10116 version);
10117 }
10118 }
10119 }
10120 if (ctxt->version != NULL)
10121 xmlFree((void *) ctxt->version);
10122 ctxt->version = version;
10123 }
10124
10125 /*
10126 * We may have the encoding declaration
10127 */
10128 if (!IS_BLANK_CH(RAW)) {
10129 if ((RAW == '?') && (NXT(1) == '>')) {
10130 SKIP(2);
10131 return;
10132 }
10133 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10134 }
10135 xmlParseEncodingDecl(ctxt);
10136 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10137 /*
10138 * The XML REC instructs us to stop parsing right here
10139 */
10140 return;
10141 }
10142
10143 /*
10144 * We may have the standalone status.
10145 */
10146 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10147 if ((RAW == '?') && (NXT(1) == '>')) {
10148 SKIP(2);
10149 return;
10150 }
10151 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10152 }
10153
10154 /*
10155 * We can grow the input buffer freely at that point
10156 */
10157 GROW;
10158
10159 SKIP_BLANKS;
10160 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10161
10162 SKIP_BLANKS;
10163 if ((RAW == '?') && (NXT(1) == '>')) {
10164 SKIP(2);
10165 } else if (RAW == '>') {
10166 /* Deprecated old WD ... */
10167 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10168 NEXT;
10169 } else {
10170 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10171 MOVETO_ENDTAG(CUR_PTR);
10172 NEXT;
10173 }
10174 }
10175
10176 /**
10177 * xmlParseMisc:
10178 * @ctxt: an XML parser context
10179 *
10180 * parse an XML Misc* optional field.
10181 *
10182 * [27] Misc ::= Comment | PI | S
10183 */
10184
10185 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10186 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10187 while (((RAW == '<') && (NXT(1) == '?')) ||
10188 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10189 IS_BLANK_CH(CUR)) {
10190 if ((RAW == '<') && (NXT(1) == '?')) {
10191 xmlParsePI(ctxt);
10192 } else if (IS_BLANK_CH(CUR)) {
10193 NEXT;
10194 } else
10195 xmlParseComment(ctxt);
10196 }
10197 }
10198
10199 /**
10200 * xmlParseDocument:
10201 * @ctxt: an XML parser context
10202 *
10203 * parse an XML document (and build a tree if using the standard SAX
10204 * interface).
10205 *
10206 * [1] document ::= prolog element Misc*
10207 *
10208 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10209 *
10210 * Returns 0, -1 in case of error. the parser context is augmented
10211 * as a result of the parsing.
10212 */
10213
10214 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10215 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10216 xmlChar start[4];
10217 xmlCharEncoding enc;
10218
10219 xmlInitParser();
10220
10221 if ((ctxt == NULL) || (ctxt->input == NULL))
10222 return(-1);
10223
10224 GROW;
10225
10226 /*
10227 * SAX: detecting the level.
10228 */
10229 xmlDetectSAX2(ctxt);
10230
10231 /*
10232 * SAX: beginning of the document processing.
10233 */
10234 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10235 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10236
10237 if ((ctxt->encoding == NULL) &&
10238 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10239 /*
10240 * Get the 4 first bytes and decode the charset
10241 * if enc != XML_CHAR_ENCODING_NONE
10242 * plug some encoding conversion routines.
10243 */
10244 start[0] = RAW;
10245 start[1] = NXT(1);
10246 start[2] = NXT(2);
10247 start[3] = NXT(3);
10248 enc = xmlDetectCharEncoding(&start[0], 4);
10249 if (enc != XML_CHAR_ENCODING_NONE) {
10250 xmlSwitchEncoding(ctxt, enc);
10251 }
10252 }
10253
10254
10255 if (CUR == 0) {
10256 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10257 }
10258
10259 /*
10260 * Check for the XMLDecl in the Prolog.
10261 * do not GROW here to avoid the detected encoder to decode more
10262 * than just the first line, unless the amount of data is really
10263 * too small to hold "<?xml version="1.0" encoding="foo"
10264 */
10265 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10266 GROW;
10267 }
10268 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10269
10270 /*
10271 * Note that we will switch encoding on the fly.
10272 */
10273 xmlParseXMLDecl(ctxt);
10274 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10275 /*
10276 * The XML REC instructs us to stop parsing right here
10277 */
10278 return(-1);
10279 }
10280 ctxt->standalone = ctxt->input->standalone;
10281 SKIP_BLANKS;
10282 } else {
10283 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10284 }
10285 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10286 ctxt->sax->startDocument(ctxt->userData);
10287
10288 /*
10289 * The Misc part of the Prolog
10290 */
10291 GROW;
10292 xmlParseMisc(ctxt);
10293
10294 /*
10295 * Then possibly doc type declaration(s) and more Misc
10296 * (doctypedecl Misc*)?
10297 */
10298 GROW;
10299 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10300
10301 ctxt->inSubset = 1;
10302 xmlParseDocTypeDecl(ctxt);
10303 if (RAW == '[') {
10304 ctxt->instate = XML_PARSER_DTD;
10305 xmlParseInternalSubset(ctxt);
10306 }
10307
10308 /*
10309 * Create and update the external subset.
10310 */
10311 ctxt->inSubset = 2;
10312 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10313 (!ctxt->disableSAX))
10314 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10315 ctxt->extSubSystem, ctxt->extSubURI);
10316 ctxt->inSubset = 0;
10317
10318 xmlCleanSpecialAttr(ctxt);
10319
10320 ctxt->instate = XML_PARSER_PROLOG;
10321 xmlParseMisc(ctxt);
10322 }
10323
10324 /*
10325 * Time to start parsing the tree itself
10326 */
10327 GROW;
10328 if (RAW != '<') {
10329 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10330 "Start tag expected, '<' not found\n");
10331 } else {
10332 ctxt->instate = XML_PARSER_CONTENT;
10333 xmlParseElement(ctxt);
10334 ctxt->instate = XML_PARSER_EPILOG;
10335
10336
10337 /*
10338 * The Misc part at the end
10339 */
10340 xmlParseMisc(ctxt);
10341
10342 if (RAW != 0) {
10343 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10344 }
10345 ctxt->instate = XML_PARSER_EOF;
10346 }
10347
10348 /*
10349 * SAX: end of the document processing.
10350 */
10351 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10352 ctxt->sax->endDocument(ctxt->userData);
10353
10354 /*
10355 * Remove locally kept entity definitions if the tree was not built
10356 */
10357 if ((ctxt->myDoc != NULL) &&
10358 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10359 xmlFreeDoc(ctxt->myDoc);
10360 ctxt->myDoc = NULL;
10361 }
10362
10363 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10364 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10365 if (ctxt->valid)
10366 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10367 if (ctxt->nsWellFormed)
10368 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10369 if (ctxt->options & XML_PARSE_OLD10)
10370 ctxt->myDoc->properties |= XML_DOC_OLD10;
10371 }
10372 if (! ctxt->wellFormed) {
10373 ctxt->valid = 0;
10374 return(-1);
10375 }
10376 return(0);
10377 }
10378
10379 /**
10380 * xmlParseExtParsedEnt:
10381 * @ctxt: an XML parser context
10382 *
10383 * parse a general parsed entity
10384 * An external general parsed entity is well-formed if it matches the
10385 * production labeled extParsedEnt.
10386 *
10387 * [78] extParsedEnt ::= TextDecl? content
10388 *
10389 * Returns 0, -1 in case of error. the parser context is augmented
10390 * as a result of the parsing.
10391 */
10392
10393 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10394 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10395 xmlChar start[4];
10396 xmlCharEncoding enc;
10397
10398 if ((ctxt == NULL) || (ctxt->input == NULL))
10399 return(-1);
10400
10401 xmlDefaultSAXHandlerInit();
10402
10403 xmlDetectSAX2(ctxt);
10404
10405 GROW;
10406
10407 /*
10408 * SAX: beginning of the document processing.
10409 */
10410 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10411 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10412
10413 /*
10414 * Get the 4 first bytes and decode the charset
10415 * if enc != XML_CHAR_ENCODING_NONE
10416 * plug some encoding conversion routines.
10417 */
10418 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10419 start[0] = RAW;
10420 start[1] = NXT(1);
10421 start[2] = NXT(2);
10422 start[3] = NXT(3);
10423 enc = xmlDetectCharEncoding(start, 4);
10424 if (enc != XML_CHAR_ENCODING_NONE) {
10425 xmlSwitchEncoding(ctxt, enc);
10426 }
10427 }
10428
10429
10430 if (CUR == 0) {
10431 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10432 }
10433
10434 /*
10435 * Check for the XMLDecl in the Prolog.
10436 */
10437 GROW;
10438 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10439
10440 /*
10441 * Note that we will switch encoding on the fly.
10442 */
10443 xmlParseXMLDecl(ctxt);
10444 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10445 /*
10446 * The XML REC instructs us to stop parsing right here
10447 */
10448 return(-1);
10449 }
10450 SKIP_BLANKS;
10451 } else {
10452 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10453 }
10454 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10455 ctxt->sax->startDocument(ctxt->userData);
10456
10457 /*
10458 * Doing validity checking on chunk doesn't make sense
10459 */
10460 ctxt->instate = XML_PARSER_CONTENT;
10461 ctxt->validate = 0;
10462 ctxt->loadsubset = 0;
10463 ctxt->depth = 0;
10464
10465 xmlParseContent(ctxt);
10466
10467 if ((RAW == '<') && (NXT(1) == '/')) {
10468 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10469 } else if (RAW != 0) {
10470 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10471 }
10472
10473 /*
10474 * SAX: end of the document processing.
10475 */
10476 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10477 ctxt->sax->endDocument(ctxt->userData);
10478
10479 if (! ctxt->wellFormed) return(-1);
10480 return(0);
10481 }
10482
10483 #ifdef LIBXML_PUSH_ENABLED
10484 /************************************************************************
10485 * *
10486 * Progressive parsing interfaces *
10487 * *
10488 ************************************************************************/
10489
10490 /**
10491 * xmlParseLookupSequence:
10492 * @ctxt: an XML parser context
10493 * @first: the first char to lookup
10494 * @next: the next char to lookup or zero
10495 * @third: the next char to lookup or zero
10496 *
10497 * Try to find if a sequence (first, next, third) or just (first next) or
10498 * (first) is available in the input stream.
10499 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10500 * to avoid rescanning sequences of bytes, it DOES change the state of the
10501 * parser, do not use liberally.
10502 *
10503 * Returns the index to the current parsing point if the full sequence
10504 * is available, -1 otherwise.
10505 */
10506 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10507 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10508 xmlChar next, xmlChar third) {
10509 int base, len;
10510 xmlParserInputPtr in;
10511 const xmlChar *buf;
10512
10513 in = ctxt->input;
10514 if (in == NULL) return(-1);
10515 base = in->cur - in->base;
10516 if (base < 0) return(-1);
10517 if (ctxt->checkIndex > base)
10518 base = ctxt->checkIndex;
10519 if (in->buf == NULL) {
10520 buf = in->base;
10521 len = in->length;
10522 } else {
10523 buf = in->buf->buffer->content;
10524 len = in->buf->buffer->use;
10525 }
10526 /* take into account the sequence length */
10527 if (third) len -= 2;
10528 else if (next) len --;
10529 for (;base < len;base++) {
10530 if (buf[base] == first) {
10531 if (third != 0) {
10532 if ((buf[base + 1] != next) ||
10533 (buf[base + 2] != third)) continue;
10534 } else if (next != 0) {
10535 if (buf[base + 1] != next) continue;
10536 }
10537 ctxt->checkIndex = 0;
10538 #ifdef DEBUG_PUSH
10539 if (next == 0)
10540 xmlGenericError(xmlGenericErrorContext,
10541 "PP: lookup '%c' found at %d\n",
10542 first, base);
10543 else if (third == 0)
10544 xmlGenericError(xmlGenericErrorContext,
10545 "PP: lookup '%c%c' found at %d\n",
10546 first, next, base);
10547 else
10548 xmlGenericError(xmlGenericErrorContext,
10549 "PP: lookup '%c%c%c' found at %d\n",
10550 first, next, third, base);
10551 #endif
10552 return(base - (in->cur - in->base));
10553 }
10554 }
10555 ctxt->checkIndex = base;
10556 #ifdef DEBUG_PUSH
10557 if (next == 0)
10558 xmlGenericError(xmlGenericErrorContext,
10559 "PP: lookup '%c' failed\n", first);
10560 else if (third == 0)
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: lookup '%c%c' failed\n", first, next);
10563 else
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: lookup '%c%c%c' failed\n", first, next, third);
10566 #endif
10567 return(-1);
10568 }
10569
10570 /**
10571 * xmlParseGetLasts:
10572 * @ctxt: an XML parser context
10573 * @lastlt: pointer to store the last '<' from the input
10574 * @lastgt: pointer to store the last '>' from the input
10575 *
10576 * Lookup the last < and > in the current chunk
10577 */
10578 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10579 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10580 const xmlChar **lastgt) {
10581 const xmlChar *tmp;
10582
10583 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10584 xmlGenericError(xmlGenericErrorContext,
10585 "Internal error: xmlParseGetLasts\n");
10586 return;
10587 }
10588 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10589 tmp = ctxt->input->end;
10590 tmp--;
10591 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10592 if (tmp < ctxt->input->base) {
10593 *lastlt = NULL;
10594 *lastgt = NULL;
10595 } else {
10596 *lastlt = tmp;
10597 tmp++;
10598 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10599 if (*tmp == '\'') {
10600 tmp++;
10601 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10602 if (tmp < ctxt->input->end) tmp++;
10603 } else if (*tmp == '"') {
10604 tmp++;
10605 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10606 if (tmp < ctxt->input->end) tmp++;
10607 } else
10608 tmp++;
10609 }
10610 if (tmp < ctxt->input->end)
10611 *lastgt = tmp;
10612 else {
10613 tmp = *lastlt;
10614 tmp--;
10615 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10616 if (tmp >= ctxt->input->base)
10617 *lastgt = tmp;
10618 else
10619 *lastgt = NULL;
10620 }
10621 }
10622 } else {
10623 *lastlt = NULL;
10624 *lastgt = NULL;
10625 }
10626 }
10627 /**
10628 * xmlCheckCdataPush:
10629 * @cur: pointer to the bock of characters
10630 * @len: length of the block in bytes
10631 *
10632 * Check that the block of characters is okay as SCdata content [20]
10633 *
10634 * Returns the number of bytes to pass if okay, a negative index where an
10635 * UTF-8 error occured otherwise
10636 */
10637 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10638 xmlCheckCdataPush(const xmlChar *utf, int len) {
10639 int ix;
10640 unsigned char c;
10641 int codepoint;
10642
10643 if ((utf == NULL) || (len <= 0))
10644 return(0);
10645
10646 for (ix = 0; ix < len;) { /* string is 0-terminated */
10647 c = utf[ix];
10648 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10649 if (c >= 0x20)
10650 ix++;
10651 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10652 ix++;
10653 else
10654 return(-ix);
10655 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10656 if (ix + 2 > len) return(ix);
10657 if ((utf[ix+1] & 0xc0 ) != 0x80)
10658 return(-ix);
10659 codepoint = (utf[ix] & 0x1f) << 6;
10660 codepoint |= utf[ix+1] & 0x3f;
10661 if (!xmlIsCharQ(codepoint))
10662 return(-ix);
10663 ix += 2;
10664 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10665 if (ix + 3 > len) return(ix);
10666 if (((utf[ix+1] & 0xc0) != 0x80) ||
10667 ((utf[ix+2] & 0xc0) != 0x80))
10668 return(-ix);
10669 codepoint = (utf[ix] & 0xf) << 12;
10670 codepoint |= (utf[ix+1] & 0x3f) << 6;
10671 codepoint |= utf[ix+2] & 0x3f;
10672 if (!xmlIsCharQ(codepoint))
10673 return(-ix);
10674 ix += 3;
10675 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10676 if (ix + 4 > len) return(ix);
10677 if (((utf[ix+1] & 0xc0) != 0x80) ||
10678 ((utf[ix+2] & 0xc0) != 0x80) ||
10679 ((utf[ix+3] & 0xc0) != 0x80))
10680 return(-ix);
10681 codepoint = (utf[ix] & 0x7) << 18;
10682 codepoint |= (utf[ix+1] & 0x3f) << 12;
10683 codepoint |= (utf[ix+2] & 0x3f) << 6;
10684 codepoint |= utf[ix+3] & 0x3f;
10685 if (!xmlIsCharQ(codepoint))
10686 return(-ix);
10687 ix += 4;
10688 } else /* unknown encoding */
10689 return(-ix);
10690 }
10691 return(ix);
10692 }
10693
10694 /**
10695 * xmlParseTryOrFinish:
10696 * @ctxt: an XML parser context
10697 * @terminate: last chunk indicator
10698 *
10699 * Try to progress on parsing
10700 *
10701 * Returns zero if no parsing was possible
10702 */
10703 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10704 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10705 int ret = 0;
10706 int avail, tlen;
10707 xmlChar cur, next;
10708 const xmlChar *lastlt, *lastgt;
10709
10710 if (ctxt->input == NULL)
10711 return(0);
10712
10713 #ifdef DEBUG_PUSH
10714 switch (ctxt->instate) {
10715 case XML_PARSER_EOF:
10716 xmlGenericError(xmlGenericErrorContext,
10717 "PP: try EOF\n"); break;
10718 case XML_PARSER_START:
10719 xmlGenericError(xmlGenericErrorContext,
10720 "PP: try START\n"); break;
10721 case XML_PARSER_MISC:
10722 xmlGenericError(xmlGenericErrorContext,
10723 "PP: try MISC\n");break;
10724 case XML_PARSER_COMMENT:
10725 xmlGenericError(xmlGenericErrorContext,
10726 "PP: try COMMENT\n");break;
10727 case XML_PARSER_PROLOG:
10728 xmlGenericError(xmlGenericErrorContext,
10729 "PP: try PROLOG\n");break;
10730 case XML_PARSER_START_TAG:
10731 xmlGenericError(xmlGenericErrorContext,
10732 "PP: try START_TAG\n");break;
10733 case XML_PARSER_CONTENT:
10734 xmlGenericError(xmlGenericErrorContext,
10735 "PP: try CONTENT\n");break;
10736 case XML_PARSER_CDATA_SECTION:
10737 xmlGenericError(xmlGenericErrorContext,
10738 "PP: try CDATA_SECTION\n");break;
10739 case XML_PARSER_END_TAG:
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: try END_TAG\n");break;
10742 case XML_PARSER_ENTITY_DECL:
10743 xmlGenericError(xmlGenericErrorContext,
10744 "PP: try ENTITY_DECL\n");break;
10745 case XML_PARSER_ENTITY_VALUE:
10746 xmlGenericError(xmlGenericErrorContext,
10747 "PP: try ENTITY_VALUE\n");break;
10748 case XML_PARSER_ATTRIBUTE_VALUE:
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: try ATTRIBUTE_VALUE\n");break;
10751 case XML_PARSER_DTD:
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: try DTD\n");break;
10754 case XML_PARSER_EPILOG:
10755 xmlGenericError(xmlGenericErrorContext,
10756 "PP: try EPILOG\n");break;
10757 case XML_PARSER_PI:
10758 xmlGenericError(xmlGenericErrorContext,
10759 "PP: try PI\n");break;
10760 case XML_PARSER_IGNORE:
10761 xmlGenericError(xmlGenericErrorContext,
10762 "PP: try IGNORE\n");break;
10763 }
10764 #endif
10765
10766 if ((ctxt->input != NULL) &&
10767 (ctxt->input->cur - ctxt->input->base > 4096)) {
10768 xmlSHRINK(ctxt);
10769 ctxt->checkIndex = 0;
10770 }
10771 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10772
10773 while (1) {
10774 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10775 return(0);
10776
10777
10778 /*
10779 * Pop-up of finished entities.
10780 */
10781 while ((RAW == 0) && (ctxt->inputNr > 1))
10782 xmlPopInput(ctxt);
10783
10784 if (ctxt->input == NULL) break;
10785 if (ctxt->input->buf == NULL)
10786 avail = ctxt->input->length -
10787 (ctxt->input->cur - ctxt->input->base);
10788 else {
10789 /*
10790 * If we are operating on converted input, try to flush
10791 * remainng chars to avoid them stalling in the non-converted
10792 * buffer.
10793 */
10794 if ((ctxt->input->buf->raw != NULL) &&
10795 (ctxt->input->buf->raw->use > 0)) {
10796 int base = ctxt->input->base -
10797 ctxt->input->buf->buffer->content;
10798 int current = ctxt->input->cur - ctxt->input->base;
10799
10800 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10801 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10802 ctxt->input->cur = ctxt->input->base + current;
10803 ctxt->input->end =
10804 &ctxt->input->buf->buffer->content[
10805 ctxt->input->buf->buffer->use];
10806 }
10807 avail = ctxt->input->buf->buffer->use -
10808 (ctxt->input->cur - ctxt->input->base);
10809 }
10810 if (avail < 1)
10811 goto done;
10812 switch (ctxt->instate) {
10813 case XML_PARSER_EOF:
10814 /*
10815 * Document parsing is done !
10816 */
10817 goto done;
10818 case XML_PARSER_START:
10819 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10820 xmlChar start[4];
10821 xmlCharEncoding enc;
10822
10823 /*
10824 * Very first chars read from the document flow.
10825 */
10826 if (avail < 4)
10827 goto done;
10828
10829 /*
10830 * Get the 4 first bytes and decode the charset
10831 * if enc != XML_CHAR_ENCODING_NONE
10832 * plug some encoding conversion routines,
10833 * else xmlSwitchEncoding will set to (default)
10834 * UTF8.
10835 */
10836 start[0] = RAW;
10837 start[1] = NXT(1);
10838 start[2] = NXT(2);
10839 start[3] = NXT(3);
10840 enc = xmlDetectCharEncoding(start, 4);
10841 xmlSwitchEncoding(ctxt, enc);
10842 break;
10843 }
10844
10845 if (avail < 2)
10846 goto done;
10847 cur = ctxt->input->cur[0];
10848 next = ctxt->input->cur[1];
10849 if (cur == 0) {
10850 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10851 ctxt->sax->setDocumentLocator(ctxt->userData,
10852 &xmlDefaultSAXLocator);
10853 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854 ctxt->instate = XML_PARSER_EOF;
10855 #ifdef DEBUG_PUSH
10856 xmlGenericError(xmlGenericErrorContext,
10857 "PP: entering EOF\n");
10858 #endif
10859 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10860 ctxt->sax->endDocument(ctxt->userData);
10861 goto done;
10862 }
10863 if ((cur == '<') && (next == '?')) {
10864 /* PI or XML decl */
10865 if (avail < 5) return(ret);
10866 if ((!terminate) &&
10867 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10868 return(ret);
10869 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10870 ctxt->sax->setDocumentLocator(ctxt->userData,
10871 &xmlDefaultSAXLocator);
10872 if ((ctxt->input->cur[2] == 'x') &&
10873 (ctxt->input->cur[3] == 'm') &&
10874 (ctxt->input->cur[4] == 'l') &&
10875 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10876 ret += 5;
10877 #ifdef DEBUG_PUSH
10878 xmlGenericError(xmlGenericErrorContext,
10879 "PP: Parsing XML Decl\n");
10880 #endif
10881 xmlParseXMLDecl(ctxt);
10882 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10883 /*
10884 * The XML REC instructs us to stop parsing right
10885 * here
10886 */
10887 ctxt->instate = XML_PARSER_EOF;
10888 return(0);
10889 }
10890 ctxt->standalone = ctxt->input->standalone;
10891 if ((ctxt->encoding == NULL) &&
10892 (ctxt->input->encoding != NULL))
10893 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10894 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10895 (!ctxt->disableSAX))
10896 ctxt->sax->startDocument(ctxt->userData);
10897 ctxt->instate = XML_PARSER_MISC;
10898 #ifdef DEBUG_PUSH
10899 xmlGenericError(xmlGenericErrorContext,
10900 "PP: entering MISC\n");
10901 #endif
10902 } else {
10903 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10904 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10905 (!ctxt->disableSAX))
10906 ctxt->sax->startDocument(ctxt->userData);
10907 ctxt->instate = XML_PARSER_MISC;
10908 #ifdef DEBUG_PUSH
10909 xmlGenericError(xmlGenericErrorContext,
10910 "PP: entering MISC\n");
10911 #endif
10912 }
10913 } else {
10914 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10915 ctxt->sax->setDocumentLocator(ctxt->userData,
10916 &xmlDefaultSAXLocator);
10917 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10918 if (ctxt->version == NULL) {
10919 xmlErrMemory(ctxt, NULL);
10920 break;
10921 }
10922 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10923 (!ctxt->disableSAX))
10924 ctxt->sax->startDocument(ctxt->userData);
10925 ctxt->instate = XML_PARSER_MISC;
10926 #ifdef DEBUG_PUSH
10927 xmlGenericError(xmlGenericErrorContext,
10928 "PP: entering MISC\n");
10929 #endif
10930 }
10931 break;
10932 case XML_PARSER_START_TAG: {
10933 const xmlChar *name;
10934 const xmlChar *prefix = NULL;
10935 const xmlChar *URI = NULL;
10936 int nsNr = ctxt->nsNr;
10937
10938 if ((avail < 2) && (ctxt->inputNr == 1))
10939 goto done;
10940 cur = ctxt->input->cur[0];
10941 if (cur != '<') {
10942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10943 ctxt->instate = XML_PARSER_EOF;
10944 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10945 ctxt->sax->endDocument(ctxt->userData);
10946 goto done;
10947 }
10948 if (!terminate) {
10949 if (ctxt->progressive) {
10950 /* > can be found unescaped in attribute values */
10951 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10952 goto done;
10953 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10954 goto done;
10955 }
10956 }
10957 if (ctxt->spaceNr == 0)
10958 spacePush(ctxt, -1);
10959 else if (*ctxt->space == -2)
10960 spacePush(ctxt, -1);
10961 else
10962 spacePush(ctxt, *ctxt->space);
10963 #ifdef LIBXML_SAX1_ENABLED
10964 if (ctxt->sax2)
10965 #endif /* LIBXML_SAX1_ENABLED */
10966 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10967 #ifdef LIBXML_SAX1_ENABLED
10968 else
10969 name = xmlParseStartTag(ctxt);
10970 #endif /* LIBXML_SAX1_ENABLED */
10971 if (name == NULL) {
10972 spacePop(ctxt);
10973 ctxt->instate = XML_PARSER_EOF;
10974 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10975 ctxt->sax->endDocument(ctxt->userData);
10976 goto done;
10977 }
10978 #ifdef LIBXML_VALID_ENABLED
10979 /*
10980 * [ VC: Root Element Type ]
10981 * The Name in the document type declaration must match
10982 * the element type of the root element.
10983 */
10984 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10985 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10986 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10987 #endif /* LIBXML_VALID_ENABLED */
10988
10989 /*
10990 * Check for an Empty Element.
10991 */
10992 if ((RAW == '/') && (NXT(1) == '>')) {
10993 SKIP(2);
10994
10995 if (ctxt->sax2) {
10996 if ((ctxt->sax != NULL) &&
10997 (ctxt->sax->endElementNs != NULL) &&
10998 (!ctxt->disableSAX))
10999 ctxt->sax->endElementNs(ctxt->userData, name,
11000 prefix, URI);
11001 if (ctxt->nsNr - nsNr > 0)
11002 nsPop(ctxt, ctxt->nsNr - nsNr);
11003 #ifdef LIBXML_SAX1_ENABLED
11004 } else {
11005 if ((ctxt->sax != NULL) &&
11006 (ctxt->sax->endElement != NULL) &&
11007 (!ctxt->disableSAX))
11008 ctxt->sax->endElement(ctxt->userData, name);
11009 #endif /* LIBXML_SAX1_ENABLED */
11010 }
11011 spacePop(ctxt);
11012 if (ctxt->nameNr == 0) {
11013 ctxt->instate = XML_PARSER_EPILOG;
11014 } else {
11015 ctxt->instate = XML_PARSER_CONTENT;
11016 }
11017 break;
11018 }
11019 if (RAW == '>') {
11020 NEXT;
11021 } else {
11022 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11023 "Couldn't find end of Start Tag %s\n",
11024 name);
11025 nodePop(ctxt);
11026 spacePop(ctxt);
11027 }
11028 if (ctxt->sax2)
11029 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11030 #ifdef LIBXML_SAX1_ENABLED
11031 else
11032 namePush(ctxt, name);
11033 #endif /* LIBXML_SAX1_ENABLED */
11034
11035 ctxt->instate = XML_PARSER_CONTENT;
11036 break;
11037 }
11038 case XML_PARSER_CONTENT: {
11039 const xmlChar *test;
11040 unsigned int cons;
11041 if ((avail < 2) && (ctxt->inputNr == 1))
11042 goto done;
11043 cur = ctxt->input->cur[0];
11044 next = ctxt->input->cur[1];
11045
11046 test = CUR_PTR;
11047 cons = ctxt->input->consumed;
11048 if ((cur == '<') && (next == '/')) {
11049 ctxt->instate = XML_PARSER_END_TAG;
11050 break;
11051 } else if ((cur == '<') && (next == '?')) {
11052 if ((!terminate) &&
11053 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11054 goto done;
11055 xmlParsePI(ctxt);
11056 } else if ((cur == '<') && (next != '!')) {
11057 ctxt->instate = XML_PARSER_START_TAG;
11058 break;
11059 } else if ((cur == '<') && (next == '!') &&
11060 (ctxt->input->cur[2] == '-') &&
11061 (ctxt->input->cur[3] == '-')) {
11062 int term;
11063
11064 if (avail < 4)
11065 goto done;
11066 ctxt->input->cur += 4;
11067 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11068 ctxt->input->cur -= 4;
11069 if ((!terminate) && (term < 0))
11070 goto done;
11071 xmlParseComment(ctxt);
11072 ctxt->instate = XML_PARSER_CONTENT;
11073 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11074 (ctxt->input->cur[2] == '[') &&
11075 (ctxt->input->cur[3] == 'C') &&
11076 (ctxt->input->cur[4] == 'D') &&
11077 (ctxt->input->cur[5] == 'A') &&
11078 (ctxt->input->cur[6] == 'T') &&
11079 (ctxt->input->cur[7] == 'A') &&
11080 (ctxt->input->cur[8] == '[')) {
11081 SKIP(9);
11082 ctxt->instate = XML_PARSER_CDATA_SECTION;
11083 break;
11084 } else if ((cur == '<') && (next == '!') &&
11085 (avail < 9)) {
11086 goto done;
11087 } else if (cur == '&') {
11088 if ((!terminate) &&
11089 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11090 goto done;
11091 xmlParseReference(ctxt);
11092 } else {
11093 /* TODO Avoid the extra copy, handle directly !!! */
11094 /*
11095 * Goal of the following test is:
11096 * - minimize calls to the SAX 'character' callback
11097 * when they are mergeable
11098 * - handle an problem for isBlank when we only parse
11099 * a sequence of blank chars and the next one is
11100 * not available to check against '<' presence.
11101 * - tries to homogenize the differences in SAX
11102 * callbacks between the push and pull versions
11103 * of the parser.
11104 */
11105 if ((ctxt->inputNr == 1) &&
11106 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11107 if (!terminate) {
11108 if (ctxt->progressive) {
11109 if ((lastlt == NULL) ||
11110 (ctxt->input->cur > lastlt))
11111 goto done;
11112 } else if (xmlParseLookupSequence(ctxt,
11113 '<', 0, 0) < 0) {
11114 goto done;
11115 }
11116 }
11117 }
11118 ctxt->checkIndex = 0;
11119 xmlParseCharData(ctxt, 0);
11120 }
11121 /*
11122 * Pop-up of finished entities.
11123 */
11124 while ((RAW == 0) && (ctxt->inputNr > 1))
11125 xmlPopInput(ctxt);
11126 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11127 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11128 "detected an error in element content\n");
11129 ctxt->instate = XML_PARSER_EOF;
11130 break;
11131 }
11132 break;
11133 }
11134 case XML_PARSER_END_TAG:
11135 if (avail < 2)
11136 goto done;
11137 if (!terminate) {
11138 if (ctxt->progressive) {
11139 /* > can be found unescaped in attribute values */
11140 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11141 goto done;
11142 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11143 goto done;
11144 }
11145 }
11146 if (ctxt->sax2) {
11147 xmlParseEndTag2(ctxt,
11148 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11149 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11150 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11151 nameNsPop(ctxt);
11152 }
11153 #ifdef LIBXML_SAX1_ENABLED
11154 else
11155 xmlParseEndTag1(ctxt, 0);
11156 #endif /* LIBXML_SAX1_ENABLED */
11157 if (ctxt->nameNr == 0) {
11158 ctxt->instate = XML_PARSER_EPILOG;
11159 } else {
11160 ctxt->instate = XML_PARSER_CONTENT;
11161 }
11162 break;
11163 case XML_PARSER_CDATA_SECTION: {
11164 /*
11165 * The Push mode need to have the SAX callback for
11166 * cdataBlock merge back contiguous callbacks.
11167 */
11168 int base;
11169
11170 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11171 if (base < 0) {
11172 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11173 int tmp;
11174
11175 tmp = xmlCheckCdataPush(ctxt->input->cur,
11176 XML_PARSER_BIG_BUFFER_SIZE);
11177 if (tmp < 0) {
11178 tmp = -tmp;
11179 ctxt->input->cur += tmp;
11180 goto encoding_error;
11181 }
11182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11183 if (ctxt->sax->cdataBlock != NULL)
11184 ctxt->sax->cdataBlock(ctxt->userData,
11185 ctxt->input->cur, tmp);
11186 else if (ctxt->sax->characters != NULL)
11187 ctxt->sax->characters(ctxt->userData,
11188 ctxt->input->cur, tmp);
11189 }
11190 SKIPL(tmp);
11191 ctxt->checkIndex = 0;
11192 }
11193 goto done;
11194 } else {
11195 int tmp;
11196
11197 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11198 if ((tmp < 0) || (tmp != base)) {
11199 tmp = -tmp;
11200 ctxt->input->cur += tmp;
11201 goto encoding_error;
11202 }
11203 if ((ctxt->sax != NULL) && (base == 0) &&
11204 (ctxt->sax->cdataBlock != NULL) &&
11205 (!ctxt->disableSAX)) {
11206 /*
11207 * Special case to provide identical behaviour
11208 * between pull and push parsers on enpty CDATA
11209 * sections
11210 */
11211 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11212 (!strncmp((const char *)&ctxt->input->cur[-9],
11213 "<![CDATA[", 9)))
11214 ctxt->sax->cdataBlock(ctxt->userData,
11215 BAD_CAST "", 0);
11216 } else if ((ctxt->sax != NULL) && (base > 0) &&
11217 (!ctxt->disableSAX)) {
11218 if (ctxt->sax->cdataBlock != NULL)
11219 ctxt->sax->cdataBlock(ctxt->userData,
11220 ctxt->input->cur, base);
11221 else if (ctxt->sax->characters != NULL)
11222 ctxt->sax->characters(ctxt->userData,
11223 ctxt->input->cur, base);
11224 }
11225 SKIPL(base + 3);
11226 ctxt->checkIndex = 0;
11227 ctxt->instate = XML_PARSER_CONTENT;
11228 #ifdef DEBUG_PUSH
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: entering CONTENT\n");
11231 #endif
11232 }
11233 break;
11234 }
11235 case XML_PARSER_MISC:
11236 SKIP_BLANKS;
11237 if (ctxt->input->buf == NULL)
11238 avail = ctxt->input->length -
11239 (ctxt->input->cur - ctxt->input->base);
11240 else
11241 avail = ctxt->input->buf->buffer->use -
11242 (ctxt->input->cur - ctxt->input->base);
11243 if (avail < 2)
11244 goto done;
11245 cur = ctxt->input->cur[0];
11246 next = ctxt->input->cur[1];
11247 if ((cur == '<') && (next == '?')) {
11248 if ((!terminate) &&
11249 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11250 goto done;
11251 #ifdef DEBUG_PUSH
11252 xmlGenericError(xmlGenericErrorContext,
11253 "PP: Parsing PI\n");
11254 #endif
11255 xmlParsePI(ctxt);
11256 ctxt->checkIndex = 0;
11257 } else if ((cur == '<') && (next == '!') &&
11258 (ctxt->input->cur[2] == '-') &&
11259 (ctxt->input->cur[3] == '-')) {
11260 if ((!terminate) &&
11261 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11262 goto done;
11263 #ifdef DEBUG_PUSH
11264 xmlGenericError(xmlGenericErrorContext,
11265 "PP: Parsing Comment\n");
11266 #endif
11267 xmlParseComment(ctxt);
11268 ctxt->instate = XML_PARSER_MISC;
11269 ctxt->checkIndex = 0;
11270 } else if ((cur == '<') && (next == '!') &&
11271 (ctxt->input->cur[2] == 'D') &&
11272 (ctxt->input->cur[3] == 'O') &&
11273 (ctxt->input->cur[4] == 'C') &&
11274 (ctxt->input->cur[5] == 'T') &&
11275 (ctxt->input->cur[6] == 'Y') &&
11276 (ctxt->input->cur[7] == 'P') &&
11277 (ctxt->input->cur[8] == 'E')) {
11278 if ((!terminate) &&
11279 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11280 goto done;
11281 #ifdef DEBUG_PUSH
11282 xmlGenericError(xmlGenericErrorContext,
11283 "PP: Parsing internal subset\n");
11284 #endif
11285 ctxt->inSubset = 1;
11286 xmlParseDocTypeDecl(ctxt);
11287 if (RAW == '[') {
11288 ctxt->instate = XML_PARSER_DTD;
11289 #ifdef DEBUG_PUSH
11290 xmlGenericError(xmlGenericErrorContext,
11291 "PP: entering DTD\n");
11292 #endif
11293 } else {
11294 /*
11295 * Create and update the external subset.
11296 */
11297 ctxt->inSubset = 2;
11298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11299 (ctxt->sax->externalSubset != NULL))
11300 ctxt->sax->externalSubset(ctxt->userData,
11301 ctxt->intSubName, ctxt->extSubSystem,
11302 ctxt->extSubURI);
11303 ctxt->inSubset = 0;
11304 xmlCleanSpecialAttr(ctxt);
11305 ctxt->instate = XML_PARSER_PROLOG;
11306 #ifdef DEBUG_PUSH
11307 xmlGenericError(xmlGenericErrorContext,
11308 "PP: entering PROLOG\n");
11309 #endif
11310 }
11311 } else if ((cur == '<') && (next == '!') &&
11312 (avail < 9)) {
11313 goto done;
11314 } else {
11315 ctxt->instate = XML_PARSER_START_TAG;
11316 ctxt->progressive = 1;
11317 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11318 #ifdef DEBUG_PUSH
11319 xmlGenericError(xmlGenericErrorContext,
11320 "PP: entering START_TAG\n");
11321 #endif
11322 }
11323 break;
11324 case XML_PARSER_PROLOG:
11325 SKIP_BLANKS;
11326 if (ctxt->input->buf == NULL)
11327 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11328 else
11329 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11330 if (avail < 2)
11331 goto done;
11332 cur = ctxt->input->cur[0];
11333 next = ctxt->input->cur[1];
11334 if ((cur == '<') && (next == '?')) {
11335 if ((!terminate) &&
11336 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11337 goto done;
11338 #ifdef DEBUG_PUSH
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: Parsing PI\n");
11341 #endif
11342 xmlParsePI(ctxt);
11343 } else if ((cur == '<') && (next == '!') &&
11344 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11345 if ((!terminate) &&
11346 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11347 goto done;
11348 #ifdef DEBUG_PUSH
11349 xmlGenericError(xmlGenericErrorContext,
11350 "PP: Parsing Comment\n");
11351 #endif
11352 xmlParseComment(ctxt);
11353 ctxt->instate = XML_PARSER_PROLOG;
11354 } else if ((cur == '<') && (next == '!') &&
11355 (avail < 4)) {
11356 goto done;
11357 } else {
11358 ctxt->instate = XML_PARSER_START_TAG;
11359 if (ctxt->progressive == 0)
11360 ctxt->progressive = 1;
11361 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11362 #ifdef DEBUG_PUSH
11363 xmlGenericError(xmlGenericErrorContext,
11364 "PP: entering START_TAG\n");
11365 #endif
11366 }
11367 break;
11368 case XML_PARSER_EPILOG:
11369 SKIP_BLANKS;
11370 if (ctxt->input->buf == NULL)
11371 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11372 else
11373 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11374 if (avail < 2)
11375 goto done;
11376 cur = ctxt->input->cur[0];
11377 next = ctxt->input->cur[1];
11378 if ((cur == '<') && (next == '?')) {
11379 if ((!terminate) &&
11380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 goto done;
11382 #ifdef DEBUG_PUSH
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: Parsing PI\n");
11385 #endif
11386 xmlParsePI(ctxt);
11387 ctxt->instate = XML_PARSER_EPILOG;
11388 } else if ((cur == '<') && (next == '!') &&
11389 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11390 if ((!terminate) &&
11391 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11392 goto done;
11393 #ifdef DEBUG_PUSH
11394 xmlGenericError(xmlGenericErrorContext,
11395 "PP: Parsing Comment\n");
11396 #endif
11397 xmlParseComment(ctxt);
11398 ctxt->instate = XML_PARSER_EPILOG;
11399 } else if ((cur == '<') && (next == '!') &&
11400 (avail < 4)) {
11401 goto done;
11402 } else {
11403 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11404 ctxt->instate = XML_PARSER_EOF;
11405 #ifdef DEBUG_PUSH
11406 xmlGenericError(xmlGenericErrorContext,
11407 "PP: entering EOF\n");
11408 #endif
11409 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11410 ctxt->sax->endDocument(ctxt->userData);
11411 goto done;
11412 }
11413 break;
11414 case XML_PARSER_DTD: {
11415 /*
11416 * Sorry but progressive parsing of the internal subset
11417 * is not expected to be supported. We first check that
11418 * the full content of the internal subset is available and
11419 * the parsing is launched only at that point.
11420 * Internal subset ends up with "']' S? '>'" in an unescaped
11421 * section and not in a ']]>' sequence which are conditional
11422 * sections (whoever argued to keep that crap in XML deserve
11423 * a place in hell !).
11424 */
11425 int base, i;
11426 xmlChar *buf;
11427 xmlChar quote = 0;
11428
11429 base = ctxt->input->cur - ctxt->input->base;
11430 if (base < 0) return(0);
11431 if (ctxt->checkIndex > base)
11432 base = ctxt->checkIndex;
11433 buf = ctxt->input->buf->buffer->content;
11434 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11435 base++) {
11436 if (quote != 0) {
11437 if (buf[base] == quote)
11438 quote = 0;
11439 continue;
11440 }
11441 if ((quote == 0) && (buf[base] == '<')) {
11442 int found = 0;
11443 /* special handling of comments */
11444 if (((unsigned int) base + 4 <
11445 ctxt->input->buf->buffer->use) &&
11446 (buf[base + 1] == '!') &&
11447 (buf[base + 2] == '-') &&
11448 (buf[base + 3] == '-')) {
11449 for (;(unsigned int) base + 3 <
11450 ctxt->input->buf->buffer->use; base++) {
11451 if ((buf[base] == '-') &&
11452 (buf[base + 1] == '-') &&
11453 (buf[base + 2] == '>')) {
11454 found = 1;
11455 base += 2;
11456 break;
11457 }
11458 }
11459 if (!found) {
11460 #if 0
11461 fprintf(stderr, "unfinished comment\n");
11462 #endif
11463 break; /* for */
11464 }
11465 continue;
11466 }
11467 }
11468 if (buf[base] == '"') {
11469 quote = '"';
11470 continue;
11471 }
11472 if (buf[base] == '\'') {
11473 quote = '\'';
11474 continue;
11475 }
11476 if (buf[base] == ']') {
11477 #if 0
11478 fprintf(stderr, "%c%c%c%c: ", buf[base],
11479 buf[base + 1], buf[base + 2], buf[base + 3]);
11480 #endif
11481 if ((unsigned int) base +1 >=
11482 ctxt->input->buf->buffer->use)
11483 break;
11484 if (buf[base + 1] == ']') {
11485 /* conditional crap, skip both ']' ! */
11486 base++;
11487 continue;
11488 }
11489 for (i = 1;
11490 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11491 i++) {
11492 if (buf[base + i] == '>') {
11493 #if 0
11494 fprintf(stderr, "found\n");
11495 #endif
11496 goto found_end_int_subset;
11497 }
11498 if (!IS_BLANK_CH(buf[base + i])) {
11499 #if 0
11500 fprintf(stderr, "not found\n");
11501 #endif
11502 goto not_end_of_int_subset;
11503 }
11504 }
11505 #if 0
11506 fprintf(stderr, "end of stream\n");
11507 #endif
11508 break;
11509
11510 }
11511 not_end_of_int_subset:
11512 continue; /* for */
11513 }
11514 /*
11515 * We didn't found the end of the Internal subset
11516 */
11517 #ifdef DEBUG_PUSH
11518 if (next == 0)
11519 xmlGenericError(xmlGenericErrorContext,
11520 "PP: lookup of int subset end filed\n");
11521 #endif
11522 goto done;
11523
11524 found_end_int_subset:
11525 xmlParseInternalSubset(ctxt);
11526 ctxt->inSubset = 2;
11527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11528 (ctxt->sax->externalSubset != NULL))
11529 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11530 ctxt->extSubSystem, ctxt->extSubURI);
11531 ctxt->inSubset = 0;
11532 xmlCleanSpecialAttr(ctxt);
11533 ctxt->instate = XML_PARSER_PROLOG;
11534 ctxt->checkIndex = 0;
11535 #ifdef DEBUG_PUSH
11536 xmlGenericError(xmlGenericErrorContext,
11537 "PP: entering PROLOG\n");
11538 #endif
11539 break;
11540 }
11541 case XML_PARSER_COMMENT:
11542 xmlGenericError(xmlGenericErrorContext,
11543 "PP: internal error, state == COMMENT\n");
11544 ctxt->instate = XML_PARSER_CONTENT;
11545 #ifdef DEBUG_PUSH
11546 xmlGenericError(xmlGenericErrorContext,
11547 "PP: entering CONTENT\n");
11548 #endif
11549 break;
11550 case XML_PARSER_IGNORE:
11551 xmlGenericError(xmlGenericErrorContext,
11552 "PP: internal error, state == IGNORE");
11553 ctxt->instate = XML_PARSER_DTD;
11554 #ifdef DEBUG_PUSH
11555 xmlGenericError(xmlGenericErrorContext,
11556 "PP: entering DTD\n");
11557 #endif
11558 break;
11559 case XML_PARSER_PI:
11560 xmlGenericError(xmlGenericErrorContext,
11561 "PP: internal error, state == PI\n");
11562 ctxt->instate = XML_PARSER_CONTENT;
11563 #ifdef DEBUG_PUSH
11564 xmlGenericError(xmlGenericErrorContext,
11565 "PP: entering CONTENT\n");
11566 #endif
11567 break;
11568 case XML_PARSER_ENTITY_DECL:
11569 xmlGenericError(xmlGenericErrorContext,
11570 "PP: internal error, state == ENTITY_DECL\n");
11571 ctxt->instate = XML_PARSER_DTD;
11572 #ifdef DEBUG_PUSH
11573 xmlGenericError(xmlGenericErrorContext,
11574 "PP: entering DTD\n");
11575 #endif
11576 break;
11577 case XML_PARSER_ENTITY_VALUE:
11578 xmlGenericError(xmlGenericErrorContext,
11579 "PP: internal error, state == ENTITY_VALUE\n");
11580 ctxt->instate = XML_PARSER_CONTENT;
11581 #ifdef DEBUG_PUSH
11582 xmlGenericError(xmlGenericErrorContext,
11583 "PP: entering DTD\n");
11584 #endif
11585 break;
11586 case XML_PARSER_ATTRIBUTE_VALUE:
11587 xmlGenericError(xmlGenericErrorContext,
11588 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11589 ctxt->instate = XML_PARSER_START_TAG;
11590 #ifdef DEBUG_PUSH
11591 xmlGenericError(xmlGenericErrorContext,
11592 "PP: entering START_TAG\n");
11593 #endif
11594 break;
11595 case XML_PARSER_SYSTEM_LITERAL:
11596 xmlGenericError(xmlGenericErrorContext,
11597 "PP: internal error, state == SYSTEM_LITERAL\n");
11598 ctxt->instate = XML_PARSER_START_TAG;
11599 #ifdef DEBUG_PUSH
11600 xmlGenericError(xmlGenericErrorContext,
11601 "PP: entering START_TAG\n");
11602 #endif
11603 break;
11604 case XML_PARSER_PUBLIC_LITERAL:
11605 xmlGenericError(xmlGenericErrorContext,
11606 "PP: internal error, state == PUBLIC_LITERAL\n");
11607 ctxt->instate = XML_PARSER_START_TAG;
11608 #ifdef DEBUG_PUSH
11609 xmlGenericError(xmlGenericErrorContext,
11610 "PP: entering START_TAG\n");
11611 #endif
11612 break;
11613 }
11614 }
11615 done:
11616 #ifdef DEBUG_PUSH
11617 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11618 #endif
11619 return(ret);
11620 encoding_error:
11621 {
11622 char buffer[150];
11623
11624 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11625 ctxt->input->cur[0], ctxt->input->cur[1],
11626 ctxt->input->cur[2], ctxt->input->cur[3]);
11627 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11628 "Input is not proper UTF-8, indicate encoding !\n%s",
11629 BAD_CAST buffer, NULL);
11630 }
11631 return(0);
11632 }
11633
11634 /**
11635 * xmlParseChunk:
11636 * @ctxt: an XML parser context
11637 * @chunk: an char array
11638 * @size: the size in byte of the chunk
11639 * @terminate: last chunk indicator
11640 *
11641 * Parse a Chunk of memory
11642 *
11643 * Returns zero if no error, the xmlParserErrors otherwise.
11644 */
11645 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11646 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11647 int terminate) {
11648 int end_in_lf = 0;
11649 int remain = 0;
11650
11651 if (ctxt == NULL)
11652 return(XML_ERR_INTERNAL_ERROR);
11653 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11654 return(ctxt->errNo);
11655 if (ctxt->instate == XML_PARSER_START)
11656 xmlDetectSAX2(ctxt);
11657 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11658 (chunk[size - 1] == '\r')) {
11659 end_in_lf = 1;
11660 size--;
11661 }
11662
11663 xmldecl_done:
11664
11665 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11666 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11667 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11668 int cur = ctxt->input->cur - ctxt->input->base;
11669 int res;
11670
11671 /*
11672 * Specific handling if we autodetected an encoding, we should not
11673 * push more than the first line ... which depend on the encoding
11674 * And only push the rest once the final encoding was detected
11675 */
11676 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11677 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11678 unsigned int len = 45;
11679
11680 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11681 BAD_CAST "UTF-16")) ||
11682 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11683 BAD_CAST "UTF16")))
11684 len = 90;
11685 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11686 BAD_CAST "UCS-4")) ||
11687 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11688 BAD_CAST "UCS4")))
11689 len = 180;
11690
11691 if (ctxt->input->buf->rawconsumed < len)
11692 len -= ctxt->input->buf->rawconsumed;
11693
11694 /*
11695 * Change size for reading the initial declaration only
11696 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11697 * will blindly copy extra bytes from memory.
11698 */
11699 if ((unsigned int) size > len) {
11700 remain = size - len;
11701 size = len;
11702 } else {
11703 remain = 0;
11704 }
11705 }
11706 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11707 if (res < 0) {
11708 ctxt->errNo = XML_PARSER_EOF;
11709 ctxt->disableSAX = 1;
11710 return (XML_PARSER_EOF);
11711 }
11712 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11713 ctxt->input->cur = ctxt->input->base + cur;
11714 ctxt->input->end =
11715 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11716 #ifdef DEBUG_PUSH
11717 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11718 #endif
11719
11720 } else if (ctxt->instate != XML_PARSER_EOF) {
11721 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11722 xmlParserInputBufferPtr in = ctxt->input->buf;
11723 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11724 (in->raw != NULL)) {
11725 int nbchars;
11726
11727 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11728 if (nbchars < 0) {
11729 /* TODO 2.6.0 */
11730 xmlGenericError(xmlGenericErrorContext,
11731 "xmlParseChunk: encoder error\n");
11732 return(XML_ERR_INVALID_ENCODING);
11733 }
11734 }
11735 }
11736 }
11737 if (remain != 0)
11738 xmlParseTryOrFinish(ctxt, 0);
11739 else
11740 xmlParseTryOrFinish(ctxt, terminate);
11741 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11742 return(ctxt->errNo);
11743
11744 if (remain != 0) {
11745 chunk += size;
11746 size = remain;
11747 remain = 0;
11748 goto xmldecl_done;
11749 }
11750 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11751 (ctxt->input->buf != NULL)) {
11752 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11753 }
11754 if (terminate) {
11755 /*
11756 * Check for termination
11757 */
11758 int avail = 0;
11759
11760 if (ctxt->input != NULL) {
11761 if (ctxt->input->buf == NULL)
11762 avail = ctxt->input->length -
11763 (ctxt->input->cur - ctxt->input->base);
11764 else
11765 avail = ctxt->input->buf->buffer->use -
11766 (ctxt->input->cur - ctxt->input->base);
11767 }
11768
11769 if ((ctxt->instate != XML_PARSER_EOF) &&
11770 (ctxt->instate != XML_PARSER_EPILOG)) {
11771 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11772 }
11773 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11774 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11775 }
11776 if (ctxt->instate != XML_PARSER_EOF) {
11777 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11778 ctxt->sax->endDocument(ctxt->userData);
11779 }
11780 ctxt->instate = XML_PARSER_EOF;
11781 }
11782 return((xmlParserErrors) ctxt->errNo);
11783 }
11784
11785 /************************************************************************
11786 * *
11787 * I/O front end functions to the parser *
11788 * *
11789 ************************************************************************/
11790
11791 /**
11792 * xmlCreatePushParserCtxt:
11793 * @sax: a SAX handler
11794 * @user_data: The user data returned on SAX callbacks
11795 * @chunk: a pointer to an array of chars
11796 * @size: number of chars in the array
11797 * @filename: an optional file name or URI
11798 *
11799 * Create a parser context for using the XML parser in push mode.
11800 * If @buffer and @size are non-NULL, the data is used to detect
11801 * the encoding. The remaining characters will be parsed so they
11802 * don't need to be fed in again through xmlParseChunk.
11803 * To allow content encoding detection, @size should be >= 4
11804 * The value of @filename is used for fetching external entities
11805 * and error/warning reports.
11806 *
11807 * Returns the new parser context or NULL
11808 */
11809
11810 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11811 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11812 const char *chunk, int size, const char *filename) {
11813 xmlParserCtxtPtr ctxt;
11814 xmlParserInputPtr inputStream;
11815 xmlParserInputBufferPtr buf;
11816 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11817
11818 /*
11819 * plug some encoding conversion routines
11820 */
11821 if ((chunk != NULL) && (size >= 4))
11822 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11823
11824 buf = xmlAllocParserInputBuffer(enc);
11825 if (buf == NULL) return(NULL);
11826
11827 ctxt = xmlNewParserCtxt();
11828 if (ctxt == NULL) {
11829 xmlErrMemory(NULL, "creating parser: out of memory\n");
11830 xmlFreeParserInputBuffer(buf);
11831 return(NULL);
11832 }
11833 ctxt->dictNames = 1;
11834 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11835 if (ctxt->pushTab == NULL) {
11836 xmlErrMemory(ctxt, NULL);
11837 xmlFreeParserInputBuffer(buf);
11838 xmlFreeParserCtxt(ctxt);
11839 return(NULL);
11840 }
11841 if (sax != NULL) {
11842 #ifdef LIBXML_SAX1_ENABLED
11843 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11844 #endif /* LIBXML_SAX1_ENABLED */
11845 xmlFree(ctxt->sax);
11846 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11847 if (ctxt->sax == NULL) {
11848 xmlErrMemory(ctxt, NULL);
11849 xmlFreeParserInputBuffer(buf);
11850 xmlFreeParserCtxt(ctxt);
11851 return(NULL);
11852 }
11853 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11854 if (sax->initialized == XML_SAX2_MAGIC)
11855 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11856 else
11857 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11858 if (user_data != NULL)
11859 ctxt->userData = user_data;
11860 }
11861 if (filename == NULL) {
11862 ctxt->directory = NULL;
11863 } else {
11864 ctxt->directory = xmlParserGetDirectory(filename);
11865 }
11866
11867 inputStream = xmlNewInputStream(ctxt);
11868 if (inputStream == NULL) {
11869 xmlFreeParserCtxt(ctxt);
11870 xmlFreeParserInputBuffer(buf);
11871 return(NULL);
11872 }
11873
11874 if (filename == NULL)
11875 inputStream->filename = NULL;
11876 else {
11877 inputStream->filename = (char *)
11878 xmlCanonicPath((const xmlChar *) filename);
11879 if (inputStream->filename == NULL) {
11880 xmlFreeParserCtxt(ctxt);
11881 xmlFreeParserInputBuffer(buf);
11882 return(NULL);
11883 }
11884 }
11885 inputStream->buf = buf;
11886 inputStream->base = inputStream->buf->buffer->content;
11887 inputStream->cur = inputStream->buf->buffer->content;
11888 inputStream->end =
11889 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11890
11891 inputPush(ctxt, inputStream);
11892
11893 /*
11894 * If the caller didn't provide an initial 'chunk' for determining
11895 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11896 * that it can be automatically determined later
11897 */
11898 if ((size == 0) || (chunk == NULL)) {
11899 ctxt->charset = XML_CHAR_ENCODING_NONE;
11900 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11901 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11902 int cur = ctxt->input->cur - ctxt->input->base;
11903
11904 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11905
11906 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11907 ctxt->input->cur = ctxt->input->base + cur;
11908 ctxt->input->end =
11909 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11910 #ifdef DEBUG_PUSH
11911 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11912 #endif
11913 }
11914
11915 if (enc != XML_CHAR_ENCODING_NONE) {
11916 xmlSwitchEncoding(ctxt, enc);
11917 }
11918
11919 return(ctxt);
11920 }
11921 #endif /* LIBXML_PUSH_ENABLED */
11922
11923 /**
11924 * xmlStopParser:
11925 * @ctxt: an XML parser context
11926 *
11927 * Blocks further parser processing
11928 */
11929 void
xmlStopParser(xmlParserCtxtPtr ctxt)11930 xmlStopParser(xmlParserCtxtPtr ctxt) {
11931 if (ctxt == NULL)
11932 return;
11933 ctxt->instate = XML_PARSER_EOF;
11934 ctxt->disableSAX = 1;
11935 if (ctxt->input != NULL) {
11936 ctxt->input->cur = BAD_CAST"";
11937 ctxt->input->base = ctxt->input->cur;
11938 }
11939 }
11940
11941 /**
11942 * xmlCreateIOParserCtxt:
11943 * @sax: a SAX handler
11944 * @user_data: The user data returned on SAX callbacks
11945 * @ioread: an I/O read function
11946 * @ioclose: an I/O close function
11947 * @ioctx: an I/O handler
11948 * @enc: the charset encoding if known
11949 *
11950 * Create a parser context for using the XML parser with an existing
11951 * I/O stream
11952 *
11953 * Returns the new parser context or NULL
11954 */
11955 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11956 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11957 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11958 void *ioctx, xmlCharEncoding enc) {
11959 xmlParserCtxtPtr ctxt;
11960 xmlParserInputPtr inputStream;
11961 xmlParserInputBufferPtr buf;
11962
11963 if (ioread == NULL) return(NULL);
11964
11965 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11966 if (buf == NULL) return(NULL);
11967
11968 ctxt = xmlNewParserCtxt();
11969 if (ctxt == NULL) {
11970 xmlFreeParserInputBuffer(buf);
11971 return(NULL);
11972 }
11973 if (sax != NULL) {
11974 #ifdef LIBXML_SAX1_ENABLED
11975 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11976 #endif /* LIBXML_SAX1_ENABLED */
11977 xmlFree(ctxt->sax);
11978 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11979 if (ctxt->sax == NULL) {
11980 xmlErrMemory(ctxt, NULL);
11981 xmlFreeParserCtxt(ctxt);
11982 return(NULL);
11983 }
11984 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11985 if (sax->initialized == XML_SAX2_MAGIC)
11986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11987 else
11988 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11989 if (user_data != NULL)
11990 ctxt->userData = user_data;
11991 }
11992
11993 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11994 if (inputStream == NULL) {
11995 xmlFreeParserCtxt(ctxt);
11996 return(NULL);
11997 }
11998 inputPush(ctxt, inputStream);
11999
12000 return(ctxt);
12001 }
12002
12003 #ifdef LIBXML_VALID_ENABLED
12004 /************************************************************************
12005 * *
12006 * Front ends when parsing a DTD *
12007 * *
12008 ************************************************************************/
12009
12010 /**
12011 * xmlIOParseDTD:
12012 * @sax: the SAX handler block or NULL
12013 * @input: an Input Buffer
12014 * @enc: the charset encoding if known
12015 *
12016 * Load and parse a DTD
12017 *
12018 * Returns the resulting xmlDtdPtr or NULL in case of error.
12019 * @input will be freed by the function in any case.
12020 */
12021
12022 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12023 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12024 xmlCharEncoding enc) {
12025 xmlDtdPtr ret = NULL;
12026 xmlParserCtxtPtr ctxt;
12027 xmlParserInputPtr pinput = NULL;
12028 xmlChar start[4];
12029
12030 if (input == NULL)
12031 return(NULL);
12032
12033 ctxt = xmlNewParserCtxt();
12034 if (ctxt == NULL) {
12035 xmlFreeParserInputBuffer(input);
12036 return(NULL);
12037 }
12038
12039 /*
12040 * Set-up the SAX context
12041 */
12042 if (sax != NULL) {
12043 if (ctxt->sax != NULL)
12044 xmlFree(ctxt->sax);
12045 ctxt->sax = sax;
12046 ctxt->userData = ctxt;
12047 }
12048 xmlDetectSAX2(ctxt);
12049
12050 /*
12051 * generate a parser input from the I/O handler
12052 */
12053
12054 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12055 if (pinput == NULL) {
12056 if (sax != NULL) ctxt->sax = NULL;
12057 xmlFreeParserInputBuffer(input);
12058 xmlFreeParserCtxt(ctxt);
12059 return(NULL);
12060 }
12061
12062 /*
12063 * plug some encoding conversion routines here.
12064 */
12065 if (xmlPushInput(ctxt, pinput) < 0) {
12066 if (sax != NULL) ctxt->sax = NULL;
12067 xmlFreeParserCtxt(ctxt);
12068 return(NULL);
12069 }
12070 if (enc != XML_CHAR_ENCODING_NONE) {
12071 xmlSwitchEncoding(ctxt, enc);
12072 }
12073
12074 pinput->filename = NULL;
12075 pinput->line = 1;
12076 pinput->col = 1;
12077 pinput->base = ctxt->input->cur;
12078 pinput->cur = ctxt->input->cur;
12079 pinput->free = NULL;
12080
12081 /*
12082 * let's parse that entity knowing it's an external subset.
12083 */
12084 ctxt->inSubset = 2;
12085 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12086 if (ctxt->myDoc == NULL) {
12087 xmlErrMemory(ctxt, "New Doc failed");
12088 return(NULL);
12089 }
12090 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12091 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12092 BAD_CAST "none", BAD_CAST "none");
12093
12094 if ((enc == XML_CHAR_ENCODING_NONE) &&
12095 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12096 /*
12097 * Get the 4 first bytes and decode the charset
12098 * if enc != XML_CHAR_ENCODING_NONE
12099 * plug some encoding conversion routines.
12100 */
12101 start[0] = RAW;
12102 start[1] = NXT(1);
12103 start[2] = NXT(2);
12104 start[3] = NXT(3);
12105 enc = xmlDetectCharEncoding(start, 4);
12106 if (enc != XML_CHAR_ENCODING_NONE) {
12107 xmlSwitchEncoding(ctxt, enc);
12108 }
12109 }
12110
12111 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12112
12113 if (ctxt->myDoc != NULL) {
12114 if (ctxt->wellFormed) {
12115 ret = ctxt->myDoc->extSubset;
12116 ctxt->myDoc->extSubset = NULL;
12117 if (ret != NULL) {
12118 xmlNodePtr tmp;
12119
12120 ret->doc = NULL;
12121 tmp = ret->children;
12122 while (tmp != NULL) {
12123 tmp->doc = NULL;
12124 tmp = tmp->next;
12125 }
12126 }
12127 } else {
12128 ret = NULL;
12129 }
12130 xmlFreeDoc(ctxt->myDoc);
12131 ctxt->myDoc = NULL;
12132 }
12133 if (sax != NULL) ctxt->sax = NULL;
12134 xmlFreeParserCtxt(ctxt);
12135
12136 return(ret);
12137 }
12138
12139 /**
12140 * xmlSAXParseDTD:
12141 * @sax: the SAX handler block
12142 * @ExternalID: a NAME* containing the External ID of the DTD
12143 * @SystemID: a NAME* containing the URL to the DTD
12144 *
12145 * Load and parse an external subset.
12146 *
12147 * Returns the resulting xmlDtdPtr or NULL in case of error.
12148 */
12149
12150 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12151 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12152 const xmlChar *SystemID) {
12153 xmlDtdPtr ret = NULL;
12154 xmlParserCtxtPtr ctxt;
12155 xmlParserInputPtr input = NULL;
12156 xmlCharEncoding enc;
12157 xmlChar* systemIdCanonic;
12158
12159 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12160
12161 ctxt = xmlNewParserCtxt();
12162 if (ctxt == NULL) {
12163 return(NULL);
12164 }
12165
12166 /*
12167 * Set-up the SAX context
12168 */
12169 if (sax != NULL) {
12170 if (ctxt->sax != NULL)
12171 xmlFree(ctxt->sax);
12172 ctxt->sax = sax;
12173 ctxt->userData = ctxt;
12174 }
12175
12176 /*
12177 * Canonicalise the system ID
12178 */
12179 systemIdCanonic = xmlCanonicPath(SystemID);
12180 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12181 xmlFreeParserCtxt(ctxt);
12182 return(NULL);
12183 }
12184
12185 /*
12186 * Ask the Entity resolver to load the damn thing
12187 */
12188
12189 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12190 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12191 systemIdCanonic);
12192 if (input == NULL) {
12193 if (sax != NULL) ctxt->sax = NULL;
12194 xmlFreeParserCtxt(ctxt);
12195 if (systemIdCanonic != NULL)
12196 xmlFree(systemIdCanonic);
12197 return(NULL);
12198 }
12199
12200 /*
12201 * plug some encoding conversion routines here.
12202 */
12203 if (xmlPushInput(ctxt, input) < 0) {
12204 if (sax != NULL) ctxt->sax = NULL;
12205 xmlFreeParserCtxt(ctxt);
12206 if (systemIdCanonic != NULL)
12207 xmlFree(systemIdCanonic);
12208 return(NULL);
12209 }
12210 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12211 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12212 xmlSwitchEncoding(ctxt, enc);
12213 }
12214
12215 if (input->filename == NULL)
12216 input->filename = (char *) systemIdCanonic;
12217 else
12218 xmlFree(systemIdCanonic);
12219 input->line = 1;
12220 input->col = 1;
12221 input->base = ctxt->input->cur;
12222 input->cur = ctxt->input->cur;
12223 input->free = NULL;
12224
12225 /*
12226 * let's parse that entity knowing it's an external subset.
12227 */
12228 ctxt->inSubset = 2;
12229 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12230 if (ctxt->myDoc == NULL) {
12231 xmlErrMemory(ctxt, "New Doc failed");
12232 if (sax != NULL) ctxt->sax = NULL;
12233 xmlFreeParserCtxt(ctxt);
12234 return(NULL);
12235 }
12236 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12237 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12238 ExternalID, SystemID);
12239 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12240
12241 if (ctxt->myDoc != NULL) {
12242 if (ctxt->wellFormed) {
12243 ret = ctxt->myDoc->extSubset;
12244 ctxt->myDoc->extSubset = NULL;
12245 if (ret != NULL) {
12246 xmlNodePtr tmp;
12247
12248 ret->doc = NULL;
12249 tmp = ret->children;
12250 while (tmp != NULL) {
12251 tmp->doc = NULL;
12252 tmp = tmp->next;
12253 }
12254 }
12255 } else {
12256 ret = NULL;
12257 }
12258 xmlFreeDoc(ctxt->myDoc);
12259 ctxt->myDoc = NULL;
12260 }
12261 if (sax != NULL) ctxt->sax = NULL;
12262 xmlFreeParserCtxt(ctxt);
12263
12264 return(ret);
12265 }
12266
12267
12268 /**
12269 * xmlParseDTD:
12270 * @ExternalID: a NAME* containing the External ID of the DTD
12271 * @SystemID: a NAME* containing the URL to the DTD
12272 *
12273 * Load and parse an external subset.
12274 *
12275 * Returns the resulting xmlDtdPtr or NULL in case of error.
12276 */
12277
12278 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12279 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12280 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12281 }
12282 #endif /* LIBXML_VALID_ENABLED */
12283
12284 /************************************************************************
12285 * *
12286 * Front ends when parsing an Entity *
12287 * *
12288 ************************************************************************/
12289
12290 /**
12291 * xmlParseCtxtExternalEntity:
12292 * @ctx: the existing parsing context
12293 * @URL: the URL for the entity to load
12294 * @ID: the System ID for the entity to load
12295 * @lst: the return value for the set of parsed nodes
12296 *
12297 * Parse an external general entity within an existing parsing context
12298 * An external general parsed entity is well-formed if it matches the
12299 * production labeled extParsedEnt.
12300 *
12301 * [78] extParsedEnt ::= TextDecl? content
12302 *
12303 * Returns 0 if the entity is well formed, -1 in case of args problem and
12304 * the parser error code otherwise
12305 */
12306
12307 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12308 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12309 const xmlChar *ID, xmlNodePtr *lst) {
12310 xmlParserCtxtPtr ctxt;
12311 xmlDocPtr newDoc;
12312 xmlNodePtr newRoot;
12313 xmlSAXHandlerPtr oldsax = NULL;
12314 int ret = 0;
12315 xmlChar start[4];
12316 xmlCharEncoding enc;
12317
12318 if (ctx == NULL) return(-1);
12319
12320 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12321 (ctx->depth > 1024)) {
12322 return(XML_ERR_ENTITY_LOOP);
12323 }
12324
12325 if (lst != NULL)
12326 *lst = NULL;
12327 if ((URL == NULL) && (ID == NULL))
12328 return(-1);
12329 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12330 return(-1);
12331
12332 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12333 if (ctxt == NULL) {
12334 return(-1);
12335 }
12336
12337 oldsax = ctxt->sax;
12338 ctxt->sax = ctx->sax;
12339 xmlDetectSAX2(ctxt);
12340 newDoc = xmlNewDoc(BAD_CAST "1.0");
12341 if (newDoc == NULL) {
12342 xmlFreeParserCtxt(ctxt);
12343 return(-1);
12344 }
12345 newDoc->properties = XML_DOC_INTERNAL;
12346 if (ctx->myDoc->dict) {
12347 newDoc->dict = ctx->myDoc->dict;
12348 xmlDictReference(newDoc->dict);
12349 }
12350 if (ctx->myDoc != NULL) {
12351 newDoc->intSubset = ctx->myDoc->intSubset;
12352 newDoc->extSubset = ctx->myDoc->extSubset;
12353 }
12354 if (ctx->myDoc->URL != NULL) {
12355 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12356 }
12357 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12358 if (newRoot == NULL) {
12359 ctxt->sax = oldsax;
12360 xmlFreeParserCtxt(ctxt);
12361 newDoc->intSubset = NULL;
12362 newDoc->extSubset = NULL;
12363 xmlFreeDoc(newDoc);
12364 return(-1);
12365 }
12366 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12367 nodePush(ctxt, newDoc->children);
12368 if (ctx->myDoc == NULL) {
12369 ctxt->myDoc = newDoc;
12370 } else {
12371 ctxt->myDoc = ctx->myDoc;
12372 newDoc->children->doc = ctx->myDoc;
12373 }
12374
12375 /*
12376 * Get the 4 first bytes and decode the charset
12377 * if enc != XML_CHAR_ENCODING_NONE
12378 * plug some encoding conversion routines.
12379 */
12380 GROW
12381 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12382 start[0] = RAW;
12383 start[1] = NXT(1);
12384 start[2] = NXT(2);
12385 start[3] = NXT(3);
12386 enc = xmlDetectCharEncoding(start, 4);
12387 if (enc != XML_CHAR_ENCODING_NONE) {
12388 xmlSwitchEncoding(ctxt, enc);
12389 }
12390 }
12391
12392 /*
12393 * Parse a possible text declaration first
12394 */
12395 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12396 xmlParseTextDecl(ctxt);
12397 /*
12398 * An XML-1.0 document can't reference an entity not XML-1.0
12399 */
12400 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12401 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12402 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12403 "Version mismatch between document and entity\n");
12404 }
12405 }
12406
12407 /*
12408 * Doing validity checking on chunk doesn't make sense
12409 */
12410 ctxt->instate = XML_PARSER_CONTENT;
12411 ctxt->validate = ctx->validate;
12412 ctxt->valid = ctx->valid;
12413 ctxt->loadsubset = ctx->loadsubset;
12414 ctxt->depth = ctx->depth + 1;
12415 ctxt->replaceEntities = ctx->replaceEntities;
12416 if (ctxt->validate) {
12417 ctxt->vctxt.error = ctx->vctxt.error;
12418 ctxt->vctxt.warning = ctx->vctxt.warning;
12419 } else {
12420 ctxt->vctxt.error = NULL;
12421 ctxt->vctxt.warning = NULL;
12422 }
12423 ctxt->vctxt.nodeTab = NULL;
12424 ctxt->vctxt.nodeNr = 0;
12425 ctxt->vctxt.nodeMax = 0;
12426 ctxt->vctxt.node = NULL;
12427 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12428 ctxt->dict = ctx->dict;
12429 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12430 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12431 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12432 ctxt->dictNames = ctx->dictNames;
12433 ctxt->attsDefault = ctx->attsDefault;
12434 ctxt->attsSpecial = ctx->attsSpecial;
12435 ctxt->linenumbers = ctx->linenumbers;
12436
12437 xmlParseContent(ctxt);
12438
12439 ctx->validate = ctxt->validate;
12440 ctx->valid = ctxt->valid;
12441 if ((RAW == '<') && (NXT(1) == '/')) {
12442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12443 } else if (RAW != 0) {
12444 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12445 }
12446 if (ctxt->node != newDoc->children) {
12447 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12448 }
12449
12450 if (!ctxt->wellFormed) {
12451 if (ctxt->errNo == 0)
12452 ret = 1;
12453 else
12454 ret = ctxt->errNo;
12455 } else {
12456 if (lst != NULL) {
12457 xmlNodePtr cur;
12458
12459 /*
12460 * Return the newly created nodeset after unlinking it from
12461 * they pseudo parent.
12462 */
12463 cur = newDoc->children->children;
12464 *lst = cur;
12465 while (cur != NULL) {
12466 cur->parent = NULL;
12467 cur = cur->next;
12468 }
12469 newDoc->children->children = NULL;
12470 }
12471 ret = 0;
12472 }
12473 ctxt->sax = oldsax;
12474 ctxt->dict = NULL;
12475 ctxt->attsDefault = NULL;
12476 ctxt->attsSpecial = NULL;
12477 xmlFreeParserCtxt(ctxt);
12478 newDoc->intSubset = NULL;
12479 newDoc->extSubset = NULL;
12480 xmlFreeDoc(newDoc);
12481
12482 return(ret);
12483 }
12484
12485 /**
12486 * xmlParseExternalEntityPrivate:
12487 * @doc: the document the chunk pertains to
12488 * @oldctxt: the previous parser context if available
12489 * @sax: the SAX handler bloc (possibly NULL)
12490 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12491 * @depth: Used for loop detection, use 0
12492 * @URL: the URL for the entity to load
12493 * @ID: the System ID for the entity to load
12494 * @list: the return value for the set of parsed nodes
12495 *
12496 * Private version of xmlParseExternalEntity()
12497 *
12498 * Returns 0 if the entity is well formed, -1 in case of args problem and
12499 * the parser error code otherwise
12500 */
12501
12502 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12503 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12504 xmlSAXHandlerPtr sax,
12505 void *user_data, int depth, const xmlChar *URL,
12506 const xmlChar *ID, xmlNodePtr *list) {
12507 xmlParserCtxtPtr ctxt;
12508 xmlDocPtr newDoc;
12509 xmlNodePtr newRoot;
12510 xmlSAXHandlerPtr oldsax = NULL;
12511 xmlParserErrors ret = XML_ERR_OK;
12512 xmlChar start[4];
12513 xmlCharEncoding enc;
12514
12515 if (((depth > 40) &&
12516 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12517 (depth > 1024)) {
12518 return(XML_ERR_ENTITY_LOOP);
12519 }
12520
12521 if (list != NULL)
12522 *list = NULL;
12523 if ((URL == NULL) && (ID == NULL))
12524 return(XML_ERR_INTERNAL_ERROR);
12525 if (doc == NULL)
12526 return(XML_ERR_INTERNAL_ERROR);
12527
12528
12529 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12530 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12531 ctxt->userData = ctxt;
12532 if (oldctxt != NULL) {
12533 ctxt->_private = oldctxt->_private;
12534 ctxt->loadsubset = oldctxt->loadsubset;
12535 ctxt->validate = oldctxt->validate;
12536 ctxt->external = oldctxt->external;
12537 ctxt->record_info = oldctxt->record_info;
12538 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12539 ctxt->node_seq.length = oldctxt->node_seq.length;
12540 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12541 } else {
12542 /*
12543 * Doing validity checking on chunk without context
12544 * doesn't make sense
12545 */
12546 ctxt->_private = NULL;
12547 ctxt->validate = 0;
12548 ctxt->external = 2;
12549 ctxt->loadsubset = 0;
12550 }
12551 if (sax != NULL) {
12552 oldsax = ctxt->sax;
12553 ctxt->sax = sax;
12554 if (user_data != NULL)
12555 ctxt->userData = user_data;
12556 }
12557 xmlDetectSAX2(ctxt);
12558 newDoc = xmlNewDoc(BAD_CAST "1.0");
12559 if (newDoc == NULL) {
12560 ctxt->node_seq.maximum = 0;
12561 ctxt->node_seq.length = 0;
12562 ctxt->node_seq.buffer = NULL;
12563 xmlFreeParserCtxt(ctxt);
12564 return(XML_ERR_INTERNAL_ERROR);
12565 }
12566 newDoc->properties = XML_DOC_INTERNAL;
12567 newDoc->intSubset = doc->intSubset;
12568 newDoc->extSubset = doc->extSubset;
12569 newDoc->dict = doc->dict;
12570 xmlDictReference(newDoc->dict);
12571
12572 if (doc->URL != NULL) {
12573 newDoc->URL = xmlStrdup(doc->URL);
12574 }
12575 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12576 if (newRoot == NULL) {
12577 if (sax != NULL)
12578 ctxt->sax = oldsax;
12579 ctxt->node_seq.maximum = 0;
12580 ctxt->node_seq.length = 0;
12581 ctxt->node_seq.buffer = NULL;
12582 xmlFreeParserCtxt(ctxt);
12583 newDoc->intSubset = NULL;
12584 newDoc->extSubset = NULL;
12585 xmlFreeDoc(newDoc);
12586 return(XML_ERR_INTERNAL_ERROR);
12587 }
12588 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12589 nodePush(ctxt, newDoc->children);
12590 ctxt->myDoc = doc;
12591 newRoot->doc = doc;
12592
12593 /*
12594 * Get the 4 first bytes and decode the charset
12595 * if enc != XML_CHAR_ENCODING_NONE
12596 * plug some encoding conversion routines.
12597 */
12598 GROW;
12599 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12600 start[0] = RAW;
12601 start[1] = NXT(1);
12602 start[2] = NXT(2);
12603 start[3] = NXT(3);
12604 enc = xmlDetectCharEncoding(start, 4);
12605 if (enc != XML_CHAR_ENCODING_NONE) {
12606 xmlSwitchEncoding(ctxt, enc);
12607 }
12608 }
12609
12610 /*
12611 * Parse a possible text declaration first
12612 */
12613 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12614 xmlParseTextDecl(ctxt);
12615 }
12616
12617 ctxt->instate = XML_PARSER_CONTENT;
12618 ctxt->depth = depth;
12619
12620 xmlParseContent(ctxt);
12621
12622 if ((RAW == '<') && (NXT(1) == '/')) {
12623 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12624 } else if (RAW != 0) {
12625 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12626 }
12627 if (ctxt->node != newDoc->children) {
12628 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12629 }
12630
12631 if (!ctxt->wellFormed) {
12632 if (ctxt->errNo == 0)
12633 ret = XML_ERR_INTERNAL_ERROR;
12634 else
12635 ret = (xmlParserErrors)ctxt->errNo;
12636 } else {
12637 if (list != NULL) {
12638 xmlNodePtr cur;
12639
12640 /*
12641 * Return the newly created nodeset after unlinking it from
12642 * they pseudo parent.
12643 */
12644 cur = newDoc->children->children;
12645 *list = cur;
12646 while (cur != NULL) {
12647 cur->parent = NULL;
12648 cur = cur->next;
12649 }
12650 newDoc->children->children = NULL;
12651 }
12652 ret = XML_ERR_OK;
12653 }
12654
12655 /*
12656 * Record in the parent context the number of entities replacement
12657 * done when parsing that reference.
12658 */
12659 if (oldctxt != NULL)
12660 oldctxt->nbentities += ctxt->nbentities;
12661
12662 /*
12663 * Also record the size of the entity parsed
12664 */
12665 if (ctxt->input != NULL) {
12666 oldctxt->sizeentities += ctxt->input->consumed;
12667 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12668 }
12669 /*
12670 * And record the last error if any
12671 */
12672 if (ctxt->lastError.code != XML_ERR_OK)
12673 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12674
12675 if (sax != NULL)
12676 ctxt->sax = oldsax;
12677 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12678 oldctxt->node_seq.length = ctxt->node_seq.length;
12679 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12680 ctxt->node_seq.maximum = 0;
12681 ctxt->node_seq.length = 0;
12682 ctxt->node_seq.buffer = NULL;
12683 xmlFreeParserCtxt(ctxt);
12684 newDoc->intSubset = NULL;
12685 newDoc->extSubset = NULL;
12686 xmlFreeDoc(newDoc);
12687
12688 return(ret);
12689 }
12690
12691 #ifdef LIBXML_SAX1_ENABLED
12692 /**
12693 * xmlParseExternalEntity:
12694 * @doc: the document the chunk pertains to
12695 * @sax: the SAX handler bloc (possibly NULL)
12696 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12697 * @depth: Used for loop detection, use 0
12698 * @URL: the URL for the entity to load
12699 * @ID: the System ID for the entity to load
12700 * @lst: the return value for the set of parsed nodes
12701 *
12702 * Parse an external general entity
12703 * An external general parsed entity is well-formed if it matches the
12704 * production labeled extParsedEnt.
12705 *
12706 * [78] extParsedEnt ::= TextDecl? content
12707 *
12708 * Returns 0 if the entity is well formed, -1 in case of args problem and
12709 * the parser error code otherwise
12710 */
12711
12712 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12713 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12714 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12715 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12716 ID, lst));
12717 }
12718
12719 /**
12720 * xmlParseBalancedChunkMemory:
12721 * @doc: the document the chunk pertains to
12722 * @sax: the SAX handler bloc (possibly NULL)
12723 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12724 * @depth: Used for loop detection, use 0
12725 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12726 * @lst: the return value for the set of parsed nodes
12727 *
12728 * Parse a well-balanced chunk of an XML document
12729 * called by the parser
12730 * The allowed sequence for the Well Balanced Chunk is the one defined by
12731 * the content production in the XML grammar:
12732 *
12733 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12734 *
12735 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12736 * the parser error code otherwise
12737 */
12738
12739 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12740 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12741 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12742 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12743 depth, string, lst, 0 );
12744 }
12745 #endif /* LIBXML_SAX1_ENABLED */
12746
12747 /**
12748 * xmlParseBalancedChunkMemoryInternal:
12749 * @oldctxt: the existing parsing context
12750 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12751 * @user_data: the user data field for the parser context
12752 * @lst: the return value for the set of parsed nodes
12753 *
12754 *
12755 * Parse a well-balanced chunk of an XML document
12756 * called by the parser
12757 * The allowed sequence for the Well Balanced Chunk is the one defined by
12758 * the content production in the XML grammar:
12759 *
12760 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12761 *
12762 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12763 * error code otherwise
12764 *
12765 * In case recover is set to 1, the nodelist will not be empty even if
12766 * the parsed chunk is not well balanced.
12767 */
12768 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12769 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12770 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12771 xmlParserCtxtPtr ctxt;
12772 xmlDocPtr newDoc = NULL;
12773 xmlNodePtr newRoot;
12774 xmlSAXHandlerPtr oldsax = NULL;
12775 xmlNodePtr content = NULL;
12776 xmlNodePtr last = NULL;
12777 int size;
12778 xmlParserErrors ret = XML_ERR_OK;
12779 #ifdef SAX2
12780 int i;
12781 #endif
12782
12783 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12784 (oldctxt->depth > 1024)) {
12785 return(XML_ERR_ENTITY_LOOP);
12786 }
12787
12788
12789 if (lst != NULL)
12790 *lst = NULL;
12791 if (string == NULL)
12792 return(XML_ERR_INTERNAL_ERROR);
12793
12794 size = xmlStrlen(string);
12795
12796 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12797 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12798 if (user_data != NULL)
12799 ctxt->userData = user_data;
12800 else
12801 ctxt->userData = ctxt;
12802 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12803 ctxt->dict = oldctxt->dict;
12804 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12805 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12806 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12807
12808 #ifdef SAX2
12809 /* propagate namespaces down the entity */
12810 for (i = 0;i < oldctxt->nsNr;i += 2) {
12811 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12812 }
12813 #endif
12814
12815 oldsax = ctxt->sax;
12816 ctxt->sax = oldctxt->sax;
12817 xmlDetectSAX2(ctxt);
12818 ctxt->replaceEntities = oldctxt->replaceEntities;
12819 ctxt->options = oldctxt->options;
12820
12821 ctxt->_private = oldctxt->_private;
12822 if (oldctxt->myDoc == NULL) {
12823 newDoc = xmlNewDoc(BAD_CAST "1.0");
12824 if (newDoc == NULL) {
12825 ctxt->sax = oldsax;
12826 ctxt->dict = NULL;
12827 xmlFreeParserCtxt(ctxt);
12828 return(XML_ERR_INTERNAL_ERROR);
12829 }
12830 newDoc->properties = XML_DOC_INTERNAL;
12831 newDoc->dict = ctxt->dict;
12832 xmlDictReference(newDoc->dict);
12833 ctxt->myDoc = newDoc;
12834 } else {
12835 ctxt->myDoc = oldctxt->myDoc;
12836 content = ctxt->myDoc->children;
12837 last = ctxt->myDoc->last;
12838 }
12839 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12840 if (newRoot == NULL) {
12841 ctxt->sax = oldsax;
12842 ctxt->dict = NULL;
12843 xmlFreeParserCtxt(ctxt);
12844 if (newDoc != NULL) {
12845 xmlFreeDoc(newDoc);
12846 }
12847 return(XML_ERR_INTERNAL_ERROR);
12848 }
12849 ctxt->myDoc->children = NULL;
12850 ctxt->myDoc->last = NULL;
12851 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12852 nodePush(ctxt, ctxt->myDoc->children);
12853 ctxt->instate = XML_PARSER_CONTENT;
12854 ctxt->depth = oldctxt->depth + 1;
12855
12856 ctxt->validate = 0;
12857 ctxt->loadsubset = oldctxt->loadsubset;
12858 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12859 /*
12860 * ID/IDREF registration will be done in xmlValidateElement below
12861 */
12862 ctxt->loadsubset |= XML_SKIP_IDS;
12863 }
12864 ctxt->dictNames = oldctxt->dictNames;
12865 ctxt->attsDefault = oldctxt->attsDefault;
12866 ctxt->attsSpecial = oldctxt->attsSpecial;
12867
12868 xmlParseContent(ctxt);
12869 if ((RAW == '<') && (NXT(1) == '/')) {
12870 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12871 } else if (RAW != 0) {
12872 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12873 }
12874 if (ctxt->node != ctxt->myDoc->children) {
12875 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12876 }
12877
12878 if (!ctxt->wellFormed) {
12879 if (ctxt->errNo == 0)
12880 ret = XML_ERR_INTERNAL_ERROR;
12881 else
12882 ret = (xmlParserErrors)ctxt->errNo;
12883 } else {
12884 ret = XML_ERR_OK;
12885 }
12886
12887 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12888 xmlNodePtr cur;
12889
12890 /*
12891 * Return the newly created nodeset after unlinking it from
12892 * they pseudo parent.
12893 */
12894 cur = ctxt->myDoc->children->children;
12895 *lst = cur;
12896 while (cur != NULL) {
12897 #ifdef LIBXML_VALID_ENABLED
12898 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12899 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12900 (cur->type == XML_ELEMENT_NODE)) {
12901 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12902 oldctxt->myDoc, cur);
12903 }
12904 #endif /* LIBXML_VALID_ENABLED */
12905 cur->parent = NULL;
12906 cur = cur->next;
12907 }
12908 ctxt->myDoc->children->children = NULL;
12909 }
12910 if (ctxt->myDoc != NULL) {
12911 xmlFreeNode(ctxt->myDoc->children);
12912 ctxt->myDoc->children = content;
12913 ctxt->myDoc->last = last;
12914 }
12915
12916 /*
12917 * Record in the parent context the number of entities replacement
12918 * done when parsing that reference.
12919 */
12920 if (oldctxt != NULL)
12921 oldctxt->nbentities += ctxt->nbentities;
12922
12923 /*
12924 * Also record the last error if any
12925 */
12926 if (ctxt->lastError.code != XML_ERR_OK)
12927 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12928
12929 ctxt->sax = oldsax;
12930 ctxt->dict = NULL;
12931 ctxt->attsDefault = NULL;
12932 ctxt->attsSpecial = NULL;
12933 xmlFreeParserCtxt(ctxt);
12934 if (newDoc != NULL) {
12935 xmlFreeDoc(newDoc);
12936 }
12937
12938 return(ret);
12939 }
12940
12941 /**
12942 * xmlParseInNodeContext:
12943 * @node: the context node
12944 * @data: the input string
12945 * @datalen: the input string length in bytes
12946 * @options: a combination of xmlParserOption
12947 * @lst: the return value for the set of parsed nodes
12948 *
12949 * Parse a well-balanced chunk of an XML document
12950 * within the context (DTD, namespaces, etc ...) of the given node.
12951 *
12952 * The allowed sequence for the data is a Well Balanced Chunk defined by
12953 * the content production in the XML grammar:
12954 *
12955 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12956 *
12957 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12958 * error code otherwise
12959 */
12960 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12961 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12962 int options, xmlNodePtr *lst) {
12963 #ifdef SAX2
12964 xmlParserCtxtPtr ctxt;
12965 xmlDocPtr doc = NULL;
12966 xmlNodePtr fake, cur;
12967 int nsnr = 0;
12968
12969 xmlParserErrors ret = XML_ERR_OK;
12970
12971 /*
12972 * check all input parameters, grab the document
12973 */
12974 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12975 return(XML_ERR_INTERNAL_ERROR);
12976 switch (node->type) {
12977 case XML_ELEMENT_NODE:
12978 case XML_ATTRIBUTE_NODE:
12979 case XML_TEXT_NODE:
12980 case XML_CDATA_SECTION_NODE:
12981 case XML_ENTITY_REF_NODE:
12982 case XML_PI_NODE:
12983 case XML_COMMENT_NODE:
12984 case XML_DOCUMENT_NODE:
12985 case XML_HTML_DOCUMENT_NODE:
12986 break;
12987 default:
12988 return(XML_ERR_INTERNAL_ERROR);
12989
12990 }
12991 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12992 (node->type != XML_DOCUMENT_NODE) &&
12993 (node->type != XML_HTML_DOCUMENT_NODE))
12994 node = node->parent;
12995 if (node == NULL)
12996 return(XML_ERR_INTERNAL_ERROR);
12997 if (node->type == XML_ELEMENT_NODE)
12998 doc = node->doc;
12999 else
13000 doc = (xmlDocPtr) node;
13001 if (doc == NULL)
13002 return(XML_ERR_INTERNAL_ERROR);
13003
13004 /*
13005 * allocate a context and set-up everything not related to the
13006 * node position in the tree
13007 */
13008 if (doc->type == XML_DOCUMENT_NODE)
13009 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13010 #ifdef LIBXML_HTML_ENABLED
13011 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13012 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13013 /*
13014 * When parsing in context, it makes no sense to add implied
13015 * elements like html/body/etc...
13016 */
13017 options |= HTML_PARSE_NOIMPLIED;
13018 }
13019 #endif
13020 else
13021 return(XML_ERR_INTERNAL_ERROR);
13022
13023 if (ctxt == NULL)
13024 return(XML_ERR_NO_MEMORY);
13025
13026 /*
13027 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13028 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13029 * we must wait until the last moment to free the original one.
13030 */
13031 if (doc->dict != NULL) {
13032 if (ctxt->dict != NULL)
13033 xmlDictFree(ctxt->dict);
13034 ctxt->dict = doc->dict;
13035 } else
13036 options |= XML_PARSE_NODICT;
13037
13038 if (doc->encoding != NULL) {
13039 xmlCharEncodingHandlerPtr hdlr;
13040
13041 if (ctxt->encoding != NULL)
13042 xmlFree((xmlChar *) ctxt->encoding);
13043 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13044
13045 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13046 if (hdlr != NULL) {
13047 xmlSwitchToEncoding(ctxt, hdlr);
13048 } else {
13049 return(XML_ERR_UNSUPPORTED_ENCODING);
13050 }
13051 }
13052
13053 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13054 xmlDetectSAX2(ctxt);
13055 ctxt->myDoc = doc;
13056
13057 fake = xmlNewComment(NULL);
13058 if (fake == NULL) {
13059 xmlFreeParserCtxt(ctxt);
13060 return(XML_ERR_NO_MEMORY);
13061 }
13062 xmlAddChild(node, fake);
13063
13064 if (node->type == XML_ELEMENT_NODE) {
13065 nodePush(ctxt, node);
13066 /*
13067 * initialize the SAX2 namespaces stack
13068 */
13069 cur = node;
13070 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13071 xmlNsPtr ns = cur->nsDef;
13072 const xmlChar *iprefix, *ihref;
13073
13074 while (ns != NULL) {
13075 if (ctxt->dict) {
13076 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13077 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13078 } else {
13079 iprefix = ns->prefix;
13080 ihref = ns->href;
13081 }
13082
13083 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13084 nsPush(ctxt, iprefix, ihref);
13085 nsnr++;
13086 }
13087 ns = ns->next;
13088 }
13089 cur = cur->parent;
13090 }
13091 ctxt->instate = XML_PARSER_CONTENT;
13092 }
13093
13094 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13095 /*
13096 * ID/IDREF registration will be done in xmlValidateElement below
13097 */
13098 ctxt->loadsubset |= XML_SKIP_IDS;
13099 }
13100
13101 #ifdef LIBXML_HTML_ENABLED
13102 if (doc->type == XML_HTML_DOCUMENT_NODE)
13103 __htmlParseContent(ctxt);
13104 else
13105 #endif
13106 xmlParseContent(ctxt);
13107
13108 nsPop(ctxt, nsnr);
13109 if ((RAW == '<') && (NXT(1) == '/')) {
13110 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13111 } else if (RAW != 0) {
13112 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13113 }
13114 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13115 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13116 ctxt->wellFormed = 0;
13117 }
13118
13119 if (!ctxt->wellFormed) {
13120 if (ctxt->errNo == 0)
13121 ret = XML_ERR_INTERNAL_ERROR;
13122 else
13123 ret = (xmlParserErrors)ctxt->errNo;
13124 } else {
13125 ret = XML_ERR_OK;
13126 }
13127
13128 /*
13129 * Return the newly created nodeset after unlinking it from
13130 * the pseudo sibling.
13131 */
13132
13133 cur = fake->next;
13134 fake->next = NULL;
13135 node->last = fake;
13136
13137 if (cur != NULL) {
13138 cur->prev = NULL;
13139 }
13140
13141 *lst = cur;
13142
13143 while (cur != NULL) {
13144 cur->parent = NULL;
13145 cur = cur->next;
13146 }
13147
13148 xmlUnlinkNode(fake);
13149 xmlFreeNode(fake);
13150
13151
13152 if (ret != XML_ERR_OK) {
13153 xmlFreeNodeList(*lst);
13154 *lst = NULL;
13155 }
13156
13157 if (doc->dict != NULL)
13158 ctxt->dict = NULL;
13159 xmlFreeParserCtxt(ctxt);
13160
13161 return(ret);
13162 #else /* !SAX2 */
13163 return(XML_ERR_INTERNAL_ERROR);
13164 #endif
13165 }
13166
13167 #ifdef LIBXML_SAX1_ENABLED
13168 /**
13169 * xmlParseBalancedChunkMemoryRecover:
13170 * @doc: the document the chunk pertains to
13171 * @sax: the SAX handler bloc (possibly NULL)
13172 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13173 * @depth: Used for loop detection, use 0
13174 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13175 * @lst: the return value for the set of parsed nodes
13176 * @recover: return nodes even if the data is broken (use 0)
13177 *
13178 *
13179 * Parse a well-balanced chunk of an XML document
13180 * called by the parser
13181 * The allowed sequence for the Well Balanced Chunk is the one defined by
13182 * the content production in the XML grammar:
13183 *
13184 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13185 *
13186 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13187 * the parser error code otherwise
13188 *
13189 * In case recover is set to 1, the nodelist will not be empty even if
13190 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13191 * some extent.
13192 */
13193 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13194 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13195 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13196 int recover) {
13197 xmlParserCtxtPtr ctxt;
13198 xmlDocPtr newDoc;
13199 xmlSAXHandlerPtr oldsax = NULL;
13200 xmlNodePtr content, newRoot;
13201 int size;
13202 int ret = 0;
13203
13204 if (depth > 40) {
13205 return(XML_ERR_ENTITY_LOOP);
13206 }
13207
13208
13209 if (lst != NULL)
13210 *lst = NULL;
13211 if (string == NULL)
13212 return(-1);
13213
13214 size = xmlStrlen(string);
13215
13216 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13217 if (ctxt == NULL) return(-1);
13218 ctxt->userData = ctxt;
13219 if (sax != NULL) {
13220 oldsax = ctxt->sax;
13221 ctxt->sax = sax;
13222 if (user_data != NULL)
13223 ctxt->userData = user_data;
13224 }
13225 newDoc = xmlNewDoc(BAD_CAST "1.0");
13226 if (newDoc == NULL) {
13227 xmlFreeParserCtxt(ctxt);
13228 return(-1);
13229 }
13230 newDoc->properties = XML_DOC_INTERNAL;
13231 if ((doc != NULL) && (doc->dict != NULL)) {
13232 xmlDictFree(ctxt->dict);
13233 ctxt->dict = doc->dict;
13234 xmlDictReference(ctxt->dict);
13235 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13236 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13237 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13238 ctxt->dictNames = 1;
13239 } else {
13240 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13241 }
13242 if (doc != NULL) {
13243 newDoc->intSubset = doc->intSubset;
13244 newDoc->extSubset = doc->extSubset;
13245 }
13246 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13247 if (newRoot == NULL) {
13248 if (sax != NULL)
13249 ctxt->sax = oldsax;
13250 xmlFreeParserCtxt(ctxt);
13251 newDoc->intSubset = NULL;
13252 newDoc->extSubset = NULL;
13253 xmlFreeDoc(newDoc);
13254 return(-1);
13255 }
13256 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13257 nodePush(ctxt, newRoot);
13258 if (doc == NULL) {
13259 ctxt->myDoc = newDoc;
13260 } else {
13261 ctxt->myDoc = newDoc;
13262 newDoc->children->doc = doc;
13263 /* Ensure that doc has XML spec namespace */
13264 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13265 newDoc->oldNs = doc->oldNs;
13266 }
13267 ctxt->instate = XML_PARSER_CONTENT;
13268 ctxt->depth = depth;
13269
13270 /*
13271 * Doing validity checking on chunk doesn't make sense
13272 */
13273 ctxt->validate = 0;
13274 ctxt->loadsubset = 0;
13275 xmlDetectSAX2(ctxt);
13276
13277 if ( doc != NULL ){
13278 content = doc->children;
13279 doc->children = NULL;
13280 xmlParseContent(ctxt);
13281 doc->children = content;
13282 }
13283 else {
13284 xmlParseContent(ctxt);
13285 }
13286 if ((RAW == '<') && (NXT(1) == '/')) {
13287 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13288 } else if (RAW != 0) {
13289 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13290 }
13291 if (ctxt->node != newDoc->children) {
13292 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13293 }
13294
13295 if (!ctxt->wellFormed) {
13296 if (ctxt->errNo == 0)
13297 ret = 1;
13298 else
13299 ret = ctxt->errNo;
13300 } else {
13301 ret = 0;
13302 }
13303
13304 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13305 xmlNodePtr cur;
13306
13307 /*
13308 * Return the newly created nodeset after unlinking it from
13309 * they pseudo parent.
13310 */
13311 cur = newDoc->children->children;
13312 *lst = cur;
13313 while (cur != NULL) {
13314 xmlSetTreeDoc(cur, doc);
13315 cur->parent = NULL;
13316 cur = cur->next;
13317 }
13318 newDoc->children->children = NULL;
13319 }
13320
13321 if (sax != NULL)
13322 ctxt->sax = oldsax;
13323 xmlFreeParserCtxt(ctxt);
13324 newDoc->intSubset = NULL;
13325 newDoc->extSubset = NULL;
13326 newDoc->oldNs = NULL;
13327 xmlFreeDoc(newDoc);
13328
13329 return(ret);
13330 }
13331
13332 /**
13333 * xmlSAXParseEntity:
13334 * @sax: the SAX handler block
13335 * @filename: the filename
13336 *
13337 * parse an XML external entity out of context and build a tree.
13338 * It use the given SAX function block to handle the parsing callback.
13339 * If sax is NULL, fallback to the default DOM tree building routines.
13340 *
13341 * [78] extParsedEnt ::= TextDecl? content
13342 *
13343 * This correspond to a "Well Balanced" chunk
13344 *
13345 * Returns the resulting document tree
13346 */
13347
13348 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13349 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13350 xmlDocPtr ret;
13351 xmlParserCtxtPtr ctxt;
13352
13353 ctxt = xmlCreateFileParserCtxt(filename);
13354 if (ctxt == NULL) {
13355 return(NULL);
13356 }
13357 if (sax != NULL) {
13358 if (ctxt->sax != NULL)
13359 xmlFree(ctxt->sax);
13360 ctxt->sax = sax;
13361 ctxt->userData = NULL;
13362 }
13363
13364 xmlParseExtParsedEnt(ctxt);
13365
13366 if (ctxt->wellFormed)
13367 ret = ctxt->myDoc;
13368 else {
13369 ret = NULL;
13370 xmlFreeDoc(ctxt->myDoc);
13371 ctxt->myDoc = NULL;
13372 }
13373 if (sax != NULL)
13374 ctxt->sax = NULL;
13375 xmlFreeParserCtxt(ctxt);
13376
13377 return(ret);
13378 }
13379
13380 /**
13381 * xmlParseEntity:
13382 * @filename: the filename
13383 *
13384 * parse an XML external entity out of context and build a tree.
13385 *
13386 * [78] extParsedEnt ::= TextDecl? content
13387 *
13388 * This correspond to a "Well Balanced" chunk
13389 *
13390 * Returns the resulting document tree
13391 */
13392
13393 xmlDocPtr
xmlParseEntity(const char * filename)13394 xmlParseEntity(const char *filename) {
13395 return(xmlSAXParseEntity(NULL, filename));
13396 }
13397 #endif /* LIBXML_SAX1_ENABLED */
13398
13399 /**
13400 * xmlCreateEntityParserCtxtInternal:
13401 * @URL: the entity URL
13402 * @ID: the entity PUBLIC ID
13403 * @base: a possible base for the target URI
13404 * @pctx: parser context used to set options on new context
13405 *
13406 * Create a parser context for an external entity
13407 * Automatic support for ZLIB/Compress compressed document is provided
13408 * by default if found at compile-time.
13409 *
13410 * Returns the new parser context or NULL
13411 */
13412 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13413 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13414 const xmlChar *base, xmlParserCtxtPtr pctx) {
13415 xmlParserCtxtPtr ctxt;
13416 xmlParserInputPtr inputStream;
13417 char *directory = NULL;
13418 xmlChar *uri;
13419
13420 ctxt = xmlNewParserCtxt();
13421 if (ctxt == NULL) {
13422 return(NULL);
13423 }
13424
13425 if (pctx != NULL) {
13426 ctxt->options = pctx->options;
13427 ctxt->_private = pctx->_private;
13428 }
13429
13430 uri = xmlBuildURI(URL, base);
13431
13432 if (uri == NULL) {
13433 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13434 if (inputStream == NULL) {
13435 xmlFreeParserCtxt(ctxt);
13436 return(NULL);
13437 }
13438
13439 inputPush(ctxt, inputStream);
13440
13441 if ((ctxt->directory == NULL) && (directory == NULL))
13442 directory = xmlParserGetDirectory((char *)URL);
13443 if ((ctxt->directory == NULL) && (directory != NULL))
13444 ctxt->directory = directory;
13445 } else {
13446 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13447 if (inputStream == NULL) {
13448 xmlFree(uri);
13449 xmlFreeParserCtxt(ctxt);
13450 return(NULL);
13451 }
13452
13453 inputPush(ctxt, inputStream);
13454
13455 if ((ctxt->directory == NULL) && (directory == NULL))
13456 directory = xmlParserGetDirectory((char *)uri);
13457 if ((ctxt->directory == NULL) && (directory != NULL))
13458 ctxt->directory = directory;
13459 xmlFree(uri);
13460 }
13461 return(ctxt);
13462 }
13463
13464 /**
13465 * xmlCreateEntityParserCtxt:
13466 * @URL: the entity URL
13467 * @ID: the entity PUBLIC ID
13468 * @base: a possible base for the target URI
13469 *
13470 * Create a parser context for an external entity
13471 * Automatic support for ZLIB/Compress compressed document is provided
13472 * by default if found at compile-time.
13473 *
13474 * Returns the new parser context or NULL
13475 */
13476 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13477 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13478 const xmlChar *base) {
13479 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13480
13481 }
13482
13483 /************************************************************************
13484 * *
13485 * Front ends when parsing from a file *
13486 * *
13487 ************************************************************************/
13488
13489 /**
13490 * xmlCreateURLParserCtxt:
13491 * @filename: the filename or URL
13492 * @options: a combination of xmlParserOption
13493 *
13494 * Create a parser context for a file or URL content.
13495 * Automatic support for ZLIB/Compress compressed document is provided
13496 * by default if found at compile-time and for file accesses
13497 *
13498 * Returns the new parser context or NULL
13499 */
13500 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13501 xmlCreateURLParserCtxt(const char *filename, int options)
13502 {
13503 xmlParserCtxtPtr ctxt;
13504 xmlParserInputPtr inputStream;
13505 char *directory = NULL;
13506
13507 ctxt = xmlNewParserCtxt();
13508 if (ctxt == NULL) {
13509 xmlErrMemory(NULL, "cannot allocate parser context");
13510 return(NULL);
13511 }
13512
13513 if (options)
13514 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13515 ctxt->linenumbers = 1;
13516
13517 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13518 if (inputStream == NULL) {
13519 xmlFreeParserCtxt(ctxt);
13520 return(NULL);
13521 }
13522
13523 inputPush(ctxt, inputStream);
13524 if ((ctxt->directory == NULL) && (directory == NULL))
13525 directory = xmlParserGetDirectory(filename);
13526 if ((ctxt->directory == NULL) && (directory != NULL))
13527 ctxt->directory = directory;
13528
13529 return(ctxt);
13530 }
13531
13532 /**
13533 * xmlCreateFileParserCtxt:
13534 * @filename: the filename
13535 *
13536 * Create a parser context for a file content.
13537 * Automatic support for ZLIB/Compress compressed document is provided
13538 * by default if found at compile-time.
13539 *
13540 * Returns the new parser context or NULL
13541 */
13542 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13543 xmlCreateFileParserCtxt(const char *filename)
13544 {
13545 return(xmlCreateURLParserCtxt(filename, 0));
13546 }
13547
13548 #ifdef LIBXML_SAX1_ENABLED
13549 /**
13550 * xmlSAXParseFileWithData:
13551 * @sax: the SAX handler block
13552 * @filename: the filename
13553 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13554 * documents
13555 * @data: the userdata
13556 *
13557 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13558 * compressed document is provided by default if found at compile-time.
13559 * It use the given SAX function block to handle the parsing callback.
13560 * If sax is NULL, fallback to the default DOM tree building routines.
13561 *
13562 * User data (void *) is stored within the parser context in the
13563 * context's _private member, so it is available nearly everywhere in libxml
13564 *
13565 * Returns the resulting document tree
13566 */
13567
13568 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13569 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13570 int recovery, void *data) {
13571 xmlDocPtr ret;
13572 xmlParserCtxtPtr ctxt;
13573
13574 xmlInitParser();
13575
13576 ctxt = xmlCreateFileParserCtxt(filename);
13577 if (ctxt == NULL) {
13578 return(NULL);
13579 }
13580 if (sax != NULL) {
13581 if (ctxt->sax != NULL)
13582 xmlFree(ctxt->sax);
13583 ctxt->sax = sax;
13584 }
13585 xmlDetectSAX2(ctxt);
13586 if (data!=NULL) {
13587 ctxt->_private = data;
13588 }
13589
13590 if (ctxt->directory == NULL)
13591 ctxt->directory = xmlParserGetDirectory(filename);
13592
13593 ctxt->recovery = recovery;
13594
13595 xmlParseDocument(ctxt);
13596
13597 if ((ctxt->wellFormed) || recovery) {
13598 ret = ctxt->myDoc;
13599 if (ret != NULL) {
13600 if (ctxt->input->buf->compressed > 0)
13601 ret->compression = 9;
13602 else
13603 ret->compression = ctxt->input->buf->compressed;
13604 }
13605 }
13606 else {
13607 ret = NULL;
13608 xmlFreeDoc(ctxt->myDoc);
13609 ctxt->myDoc = NULL;
13610 }
13611 if (sax != NULL)
13612 ctxt->sax = NULL;
13613 xmlFreeParserCtxt(ctxt);
13614
13615 return(ret);
13616 }
13617
13618 /**
13619 * xmlSAXParseFile:
13620 * @sax: the SAX handler block
13621 * @filename: the filename
13622 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13623 * documents
13624 *
13625 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13626 * compressed document is provided by default if found at compile-time.
13627 * It use the given SAX function block to handle the parsing callback.
13628 * If sax is NULL, fallback to the default DOM tree building routines.
13629 *
13630 * Returns the resulting document tree
13631 */
13632
13633 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13634 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13635 int recovery) {
13636 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13637 }
13638
13639 /**
13640 * xmlRecoverDoc:
13641 * @cur: a pointer to an array of xmlChar
13642 *
13643 * parse an XML in-memory document and build a tree.
13644 * In the case the document is not Well Formed, a attempt to build a
13645 * tree is tried anyway
13646 *
13647 * Returns the resulting document tree or NULL in case of failure
13648 */
13649
13650 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13651 xmlRecoverDoc(const xmlChar *cur) {
13652 return(xmlSAXParseDoc(NULL, cur, 1));
13653 }
13654
13655 /**
13656 * xmlParseFile:
13657 * @filename: the filename
13658 *
13659 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13660 * compressed document is provided by default if found at compile-time.
13661 *
13662 * Returns the resulting document tree if the file was wellformed,
13663 * NULL otherwise.
13664 */
13665
13666 xmlDocPtr
xmlParseFile(const char * filename)13667 xmlParseFile(const char *filename) {
13668 return(xmlSAXParseFile(NULL, filename, 0));
13669 }
13670
13671 /**
13672 * xmlRecoverFile:
13673 * @filename: the filename
13674 *
13675 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13676 * compressed document is provided by default if found at compile-time.
13677 * In the case the document is not Well Formed, it attempts to build
13678 * a tree anyway
13679 *
13680 * Returns the resulting document tree or NULL in case of failure
13681 */
13682
13683 xmlDocPtr
xmlRecoverFile(const char * filename)13684 xmlRecoverFile(const char *filename) {
13685 return(xmlSAXParseFile(NULL, filename, 1));
13686 }
13687
13688
13689 /**
13690 * xmlSetupParserForBuffer:
13691 * @ctxt: an XML parser context
13692 * @buffer: a xmlChar * buffer
13693 * @filename: a file name
13694 *
13695 * Setup the parser context to parse a new buffer; Clears any prior
13696 * contents from the parser context. The buffer parameter must not be
13697 * NULL, but the filename parameter can be
13698 */
13699 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13700 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13701 const char* filename)
13702 {
13703 xmlParserInputPtr input;
13704
13705 if ((ctxt == NULL) || (buffer == NULL))
13706 return;
13707
13708 input = xmlNewInputStream(ctxt);
13709 if (input == NULL) {
13710 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13711 xmlClearParserCtxt(ctxt);
13712 return;
13713 }
13714
13715 xmlClearParserCtxt(ctxt);
13716 if (filename != NULL)
13717 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13718 input->base = buffer;
13719 input->cur = buffer;
13720 input->end = &buffer[xmlStrlen(buffer)];
13721 inputPush(ctxt, input);
13722 }
13723
13724 /**
13725 * xmlSAXUserParseFile:
13726 * @sax: a SAX handler
13727 * @user_data: The user data returned on SAX callbacks
13728 * @filename: a file name
13729 *
13730 * parse an XML file and call the given SAX handler routines.
13731 * Automatic support for ZLIB/Compress compressed document is provided
13732 *
13733 * Returns 0 in case of success or a error number otherwise
13734 */
13735 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13736 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13737 const char *filename) {
13738 int ret = 0;
13739 xmlParserCtxtPtr ctxt;
13740
13741 ctxt = xmlCreateFileParserCtxt(filename);
13742 if (ctxt == NULL) return -1;
13743 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13744 xmlFree(ctxt->sax);
13745 ctxt->sax = sax;
13746 xmlDetectSAX2(ctxt);
13747
13748 if (user_data != NULL)
13749 ctxt->userData = user_data;
13750
13751 xmlParseDocument(ctxt);
13752
13753 if (ctxt->wellFormed)
13754 ret = 0;
13755 else {
13756 if (ctxt->errNo != 0)
13757 ret = ctxt->errNo;
13758 else
13759 ret = -1;
13760 }
13761 if (sax != NULL)
13762 ctxt->sax = NULL;
13763 if (ctxt->myDoc != NULL) {
13764 xmlFreeDoc(ctxt->myDoc);
13765 ctxt->myDoc = NULL;
13766 }
13767 xmlFreeParserCtxt(ctxt);
13768
13769 return ret;
13770 }
13771 #endif /* LIBXML_SAX1_ENABLED */
13772
13773 /************************************************************************
13774 * *
13775 * Front ends when parsing from memory *
13776 * *
13777 ************************************************************************/
13778
13779 /**
13780 * xmlCreateMemoryParserCtxt:
13781 * @buffer: a pointer to a char array
13782 * @size: the size of the array
13783 *
13784 * Create a parser context for an XML in-memory document.
13785 *
13786 * Returns the new parser context or NULL
13787 */
13788 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13789 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13790 xmlParserCtxtPtr ctxt;
13791 xmlParserInputPtr input;
13792 xmlParserInputBufferPtr buf;
13793
13794 if (buffer == NULL)
13795 return(NULL);
13796 if (size <= 0)
13797 return(NULL);
13798
13799 ctxt = xmlNewParserCtxt();
13800 if (ctxt == NULL)
13801 return(NULL);
13802
13803 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13804 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13805 if (buf == NULL) {
13806 xmlFreeParserCtxt(ctxt);
13807 return(NULL);
13808 }
13809
13810 input = xmlNewInputStream(ctxt);
13811 if (input == NULL) {
13812 xmlFreeParserInputBuffer(buf);
13813 xmlFreeParserCtxt(ctxt);
13814 return(NULL);
13815 }
13816
13817 input->filename = NULL;
13818 input->buf = buf;
13819 input->base = input->buf->buffer->content;
13820 input->cur = input->buf->buffer->content;
13821 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13822
13823 inputPush(ctxt, input);
13824 return(ctxt);
13825 }
13826
13827 #ifdef LIBXML_SAX1_ENABLED
13828 /**
13829 * xmlSAXParseMemoryWithData:
13830 * @sax: the SAX handler block
13831 * @buffer: an pointer to a char array
13832 * @size: the size of the array
13833 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13834 * documents
13835 * @data: the userdata
13836 *
13837 * parse an XML in-memory block and use the given SAX function block
13838 * to handle the parsing callback. If sax is NULL, fallback to the default
13839 * DOM tree building routines.
13840 *
13841 * User data (void *) is stored within the parser context in the
13842 * context's _private member, so it is available nearly everywhere in libxml
13843 *
13844 * Returns the resulting document tree
13845 */
13846
13847 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13848 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13849 int size, int recovery, void *data) {
13850 xmlDocPtr ret;
13851 xmlParserCtxtPtr ctxt;
13852
13853 xmlInitParser();
13854
13855 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13856 if (ctxt == NULL) return(NULL);
13857 if (sax != NULL) {
13858 if (ctxt->sax != NULL)
13859 xmlFree(ctxt->sax);
13860 ctxt->sax = sax;
13861 }
13862 xmlDetectSAX2(ctxt);
13863 if (data!=NULL) {
13864 ctxt->_private=data;
13865 }
13866
13867 ctxt->recovery = recovery;
13868
13869 xmlParseDocument(ctxt);
13870
13871 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13872 else {
13873 ret = NULL;
13874 xmlFreeDoc(ctxt->myDoc);
13875 ctxt->myDoc = NULL;
13876 }
13877 if (sax != NULL)
13878 ctxt->sax = NULL;
13879 xmlFreeParserCtxt(ctxt);
13880
13881 return(ret);
13882 }
13883
13884 /**
13885 * xmlSAXParseMemory:
13886 * @sax: the SAX handler block
13887 * @buffer: an pointer to a char array
13888 * @size: the size of the array
13889 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13890 * documents
13891 *
13892 * parse an XML in-memory block and use the given SAX function block
13893 * to handle the parsing callback. If sax is NULL, fallback to the default
13894 * DOM tree building routines.
13895 *
13896 * Returns the resulting document tree
13897 */
13898 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13899 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13900 int size, int recovery) {
13901 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13902 }
13903
13904 /**
13905 * xmlParseMemory:
13906 * @buffer: an pointer to a char array
13907 * @size: the size of the array
13908 *
13909 * parse an XML in-memory block and build a tree.
13910 *
13911 * Returns the resulting document tree
13912 */
13913
xmlParseMemory(const char * buffer,int size)13914 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13915 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13916 }
13917
13918 /**
13919 * xmlRecoverMemory:
13920 * @buffer: an pointer to a char array
13921 * @size: the size of the array
13922 *
13923 * parse an XML in-memory block and build a tree.
13924 * In the case the document is not Well Formed, an attempt to
13925 * build a tree is tried anyway
13926 *
13927 * Returns the resulting document tree or NULL in case of error
13928 */
13929
xmlRecoverMemory(const char * buffer,int size)13930 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13931 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13932 }
13933
13934 /**
13935 * xmlSAXUserParseMemory:
13936 * @sax: a SAX handler
13937 * @user_data: The user data returned on SAX callbacks
13938 * @buffer: an in-memory XML document input
13939 * @size: the length of the XML document in bytes
13940 *
13941 * A better SAX parsing routine.
13942 * parse an XML in-memory buffer and call the given SAX handler routines.
13943 *
13944 * Returns 0 in case of success or a error number otherwise
13945 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13946 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13947 const char *buffer, int size) {
13948 int ret = 0;
13949 xmlParserCtxtPtr ctxt;
13950
13951 xmlInitParser();
13952
13953 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13954 if (ctxt == NULL) return -1;
13955 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13956 xmlFree(ctxt->sax);
13957 ctxt->sax = sax;
13958 xmlDetectSAX2(ctxt);
13959
13960 if (user_data != NULL)
13961 ctxt->userData = user_data;
13962
13963 xmlParseDocument(ctxt);
13964
13965 if (ctxt->wellFormed)
13966 ret = 0;
13967 else {
13968 if (ctxt->errNo != 0)
13969 ret = ctxt->errNo;
13970 else
13971 ret = -1;
13972 }
13973 if (sax != NULL)
13974 ctxt->sax = NULL;
13975 if (ctxt->myDoc != NULL) {
13976 xmlFreeDoc(ctxt->myDoc);
13977 ctxt->myDoc = NULL;
13978 }
13979 xmlFreeParserCtxt(ctxt);
13980
13981 return ret;
13982 }
13983 #endif /* LIBXML_SAX1_ENABLED */
13984
13985 /**
13986 * xmlCreateDocParserCtxt:
13987 * @cur: a pointer to an array of xmlChar
13988 *
13989 * Creates a parser context for an XML in-memory document.
13990 *
13991 * Returns the new parser context or NULL
13992 */
13993 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)13994 xmlCreateDocParserCtxt(const xmlChar *cur) {
13995 int len;
13996
13997 if (cur == NULL)
13998 return(NULL);
13999 len = xmlStrlen(cur);
14000 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14001 }
14002
14003 #ifdef LIBXML_SAX1_ENABLED
14004 /**
14005 * xmlSAXParseDoc:
14006 * @sax: the SAX handler block
14007 * @cur: a pointer to an array of xmlChar
14008 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14009 * documents
14010 *
14011 * parse an XML in-memory document and build a tree.
14012 * It use the given SAX function block to handle the parsing callback.
14013 * If sax is NULL, fallback to the default DOM tree building routines.
14014 *
14015 * Returns the resulting document tree
14016 */
14017
14018 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14019 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14020 xmlDocPtr ret;
14021 xmlParserCtxtPtr ctxt;
14022 xmlSAXHandlerPtr oldsax = NULL;
14023
14024 if (cur == NULL) return(NULL);
14025
14026
14027 ctxt = xmlCreateDocParserCtxt(cur);
14028 if (ctxt == NULL) return(NULL);
14029 if (sax != NULL) {
14030 oldsax = ctxt->sax;
14031 ctxt->sax = sax;
14032 ctxt->userData = NULL;
14033 }
14034 xmlDetectSAX2(ctxt);
14035
14036 xmlParseDocument(ctxt);
14037 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14038 else {
14039 ret = NULL;
14040 xmlFreeDoc(ctxt->myDoc);
14041 ctxt->myDoc = NULL;
14042 }
14043 if (sax != NULL)
14044 ctxt->sax = oldsax;
14045 xmlFreeParserCtxt(ctxt);
14046
14047 return(ret);
14048 }
14049
14050 /**
14051 * xmlParseDoc:
14052 * @cur: a pointer to an array of xmlChar
14053 *
14054 * parse an XML in-memory document and build a tree.
14055 *
14056 * Returns the resulting document tree
14057 */
14058
14059 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14060 xmlParseDoc(const xmlChar *cur) {
14061 return(xmlSAXParseDoc(NULL, cur, 0));
14062 }
14063 #endif /* LIBXML_SAX1_ENABLED */
14064
14065 #ifdef LIBXML_LEGACY_ENABLED
14066 /************************************************************************
14067 * *
14068 * Specific function to keep track of entities references *
14069 * and used by the XSLT debugger *
14070 * *
14071 ************************************************************************/
14072
14073 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14074
14075 /**
14076 * xmlAddEntityReference:
14077 * @ent : A valid entity
14078 * @firstNode : A valid first node for children of entity
14079 * @lastNode : A valid last node of children entity
14080 *
14081 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14082 */
14083 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14084 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14085 xmlNodePtr lastNode)
14086 {
14087 if (xmlEntityRefFunc != NULL) {
14088 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14089 }
14090 }
14091
14092
14093 /**
14094 * xmlSetEntityReferenceFunc:
14095 * @func: A valid function
14096 *
14097 * Set the function to call call back when a xml reference has been made
14098 */
14099 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14100 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14101 {
14102 xmlEntityRefFunc = func;
14103 }
14104 #endif /* LIBXML_LEGACY_ENABLED */
14105
14106 /************************************************************************
14107 * *
14108 * Miscellaneous *
14109 * *
14110 ************************************************************************/
14111
14112 #ifdef LIBXML_XPATH_ENABLED
14113 #include <libxml/xpath.h>
14114 #endif
14115
14116 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14117 static int xmlParserInitialized = 0;
14118
14119 /**
14120 * xmlInitParser:
14121 *
14122 * Initialization function for the XML parser.
14123 * This is not reentrant. Call once before processing in case of
14124 * use in multithreaded programs.
14125 */
14126
14127 void
xmlInitParser(void)14128 xmlInitParser(void) {
14129 if (xmlParserInitialized != 0)
14130 return;
14131
14132 #ifdef LIBXML_THREAD_ENABLED
14133 __xmlGlobalInitMutexLock();
14134 if (xmlParserInitialized == 0) {
14135 #endif
14136 xmlInitThreads();
14137 xmlInitGlobals();
14138 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14139 (xmlGenericError == NULL))
14140 initGenericErrorDefaultFunc(NULL);
14141 xmlInitMemory();
14142 xmlInitCharEncodingHandlers();
14143 xmlDefaultSAXHandlerInit();
14144 xmlRegisterDefaultInputCallbacks();
14145 #ifdef LIBXML_OUTPUT_ENABLED
14146 xmlRegisterDefaultOutputCallbacks();
14147 #endif /* LIBXML_OUTPUT_ENABLED */
14148 #ifdef LIBXML_HTML_ENABLED
14149 htmlInitAutoClose();
14150 htmlDefaultSAXHandlerInit();
14151 #endif
14152 #ifdef LIBXML_XPATH_ENABLED
14153 xmlXPathInit();
14154 #endif
14155 xmlParserInitialized = 1;
14156 #ifdef LIBXML_THREAD_ENABLED
14157 }
14158 __xmlGlobalInitMutexUnlock();
14159 #endif
14160 }
14161
14162 /**
14163 * xmlCleanupParser:
14164 *
14165 * This function name is somewhat misleading. It does not clean up
14166 * parser state, it cleans up memory allocated by the library itself.
14167 * It is a cleanup function for the XML library. It tries to reclaim all
14168 * related global memory allocated for the library processing.
14169 * It doesn't deallocate any document related memory. One should
14170 * call xmlCleanupParser() only when the process has finished using
14171 * the library and all XML/HTML documents built with it.
14172 * See also xmlInitParser() which has the opposite function of preparing
14173 * the library for operations.
14174 *
14175 * WARNING: if your application is multithreaded or has plugin support
14176 * calling this may crash the application if another thread or
14177 * a plugin is still using libxml2. It's sometimes very hard to
14178 * guess if libxml2 is in use in the application, some libraries
14179 * or plugins may use it without notice. In case of doubt abstain
14180 * from calling this function or do it just before calling exit()
14181 * to avoid leak reports from valgrind !
14182 */
14183
14184 void
xmlCleanupParser(void)14185 xmlCleanupParser(void) {
14186 if (!xmlParserInitialized)
14187 return;
14188
14189 xmlCleanupCharEncodingHandlers();
14190 #ifdef LIBXML_CATALOG_ENABLED
14191 xmlCatalogCleanup();
14192 #endif
14193 xmlDictCleanup();
14194 xmlCleanupInputCallbacks();
14195 #ifdef LIBXML_OUTPUT_ENABLED
14196 xmlCleanupOutputCallbacks();
14197 #endif
14198 #ifdef LIBXML_SCHEMAS_ENABLED
14199 xmlSchemaCleanupTypes();
14200 xmlRelaxNGCleanupTypes();
14201 #endif
14202 xmlCleanupGlobals();
14203 xmlResetLastError();
14204 xmlCleanupThreads(); /* must be last if called not from the main thread */
14205 xmlCleanupMemory();
14206 xmlParserInitialized = 0;
14207 }
14208
14209 /************************************************************************
14210 * *
14211 * New set (2.6.0) of simpler and more flexible APIs *
14212 * *
14213 ************************************************************************/
14214
14215 /**
14216 * DICT_FREE:
14217 * @str: a string
14218 *
14219 * Free a string if it is not owned by the "dict" dictionnary in the
14220 * current scope
14221 */
14222 #define DICT_FREE(str) \
14223 if ((str) && ((!dict) || \
14224 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14225 xmlFree((char *)(str));
14226
14227 /**
14228 * xmlCtxtReset:
14229 * @ctxt: an XML parser context
14230 *
14231 * Reset a parser context
14232 */
14233 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14234 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14235 {
14236 xmlParserInputPtr input;
14237 xmlDictPtr dict;
14238
14239 if (ctxt == NULL)
14240 return;
14241
14242 dict = ctxt->dict;
14243
14244 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14245 xmlFreeInputStream(input);
14246 }
14247 ctxt->inputNr = 0;
14248 ctxt->input = NULL;
14249
14250 ctxt->spaceNr = 0;
14251 if (ctxt->spaceTab != NULL) {
14252 ctxt->spaceTab[0] = -1;
14253 ctxt->space = &ctxt->spaceTab[0];
14254 } else {
14255 ctxt->space = NULL;
14256 }
14257
14258
14259 ctxt->nodeNr = 0;
14260 ctxt->node = NULL;
14261
14262 ctxt->nameNr = 0;
14263 ctxt->name = NULL;
14264
14265 DICT_FREE(ctxt->version);
14266 ctxt->version = NULL;
14267 DICT_FREE(ctxt->encoding);
14268 ctxt->encoding = NULL;
14269 DICT_FREE(ctxt->directory);
14270 ctxt->directory = NULL;
14271 DICT_FREE(ctxt->extSubURI);
14272 ctxt->extSubURI = NULL;
14273 DICT_FREE(ctxt->extSubSystem);
14274 ctxt->extSubSystem = NULL;
14275 if (ctxt->myDoc != NULL)
14276 xmlFreeDoc(ctxt->myDoc);
14277 ctxt->myDoc = NULL;
14278
14279 ctxt->standalone = -1;
14280 ctxt->hasExternalSubset = 0;
14281 ctxt->hasPErefs = 0;
14282 ctxt->html = 0;
14283 ctxt->external = 0;
14284 ctxt->instate = XML_PARSER_START;
14285 ctxt->token = 0;
14286
14287 ctxt->wellFormed = 1;
14288 ctxt->nsWellFormed = 1;
14289 ctxt->disableSAX = 0;
14290 ctxt->valid = 1;
14291 #if 0
14292 ctxt->vctxt.userData = ctxt;
14293 ctxt->vctxt.error = xmlParserValidityError;
14294 ctxt->vctxt.warning = xmlParserValidityWarning;
14295 #endif
14296 ctxt->record_info = 0;
14297 ctxt->nbChars = 0;
14298 ctxt->checkIndex = 0;
14299 ctxt->inSubset = 0;
14300 ctxt->errNo = XML_ERR_OK;
14301 ctxt->depth = 0;
14302 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14303 ctxt->catalogs = NULL;
14304 ctxt->nbentities = 0;
14305 ctxt->sizeentities = 0;
14306 xmlInitNodeInfoSeq(&ctxt->node_seq);
14307
14308 if (ctxt->attsDefault != NULL) {
14309 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14310 ctxt->attsDefault = NULL;
14311 }
14312 if (ctxt->attsSpecial != NULL) {
14313 xmlHashFree(ctxt->attsSpecial, NULL);
14314 ctxt->attsSpecial = NULL;
14315 }
14316
14317 #ifdef LIBXML_CATALOG_ENABLED
14318 if (ctxt->catalogs != NULL)
14319 xmlCatalogFreeLocal(ctxt->catalogs);
14320 #endif
14321 if (ctxt->lastError.code != XML_ERR_OK)
14322 xmlResetError(&ctxt->lastError);
14323 }
14324
14325 /**
14326 * xmlCtxtResetPush:
14327 * @ctxt: an XML parser context
14328 * @chunk: a pointer to an array of chars
14329 * @size: number of chars in the array
14330 * @filename: an optional file name or URI
14331 * @encoding: the document encoding, or NULL
14332 *
14333 * Reset a push parser context
14334 *
14335 * Returns 0 in case of success and 1 in case of error
14336 */
14337 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14338 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14339 int size, const char *filename, const char *encoding)
14340 {
14341 xmlParserInputPtr inputStream;
14342 xmlParserInputBufferPtr buf;
14343 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14344
14345 if (ctxt == NULL)
14346 return(1);
14347
14348 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14349 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14350
14351 buf = xmlAllocParserInputBuffer(enc);
14352 if (buf == NULL)
14353 return(1);
14354
14355 if (ctxt == NULL) {
14356 xmlFreeParserInputBuffer(buf);
14357 return(1);
14358 }
14359
14360 xmlCtxtReset(ctxt);
14361
14362 if (ctxt->pushTab == NULL) {
14363 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14364 sizeof(xmlChar *));
14365 if (ctxt->pushTab == NULL) {
14366 xmlErrMemory(ctxt, NULL);
14367 xmlFreeParserInputBuffer(buf);
14368 return(1);
14369 }
14370 }
14371
14372 if (filename == NULL) {
14373 ctxt->directory = NULL;
14374 } else {
14375 ctxt->directory = xmlParserGetDirectory(filename);
14376 }
14377
14378 inputStream = xmlNewInputStream(ctxt);
14379 if (inputStream == NULL) {
14380 xmlFreeParserInputBuffer(buf);
14381 return(1);
14382 }
14383
14384 if (filename == NULL)
14385 inputStream->filename = NULL;
14386 else
14387 inputStream->filename = (char *)
14388 xmlCanonicPath((const xmlChar *) filename);
14389 inputStream->buf = buf;
14390 inputStream->base = inputStream->buf->buffer->content;
14391 inputStream->cur = inputStream->buf->buffer->content;
14392 inputStream->end =
14393 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14394
14395 inputPush(ctxt, inputStream);
14396
14397 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14398 (ctxt->input->buf != NULL)) {
14399 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14400 int cur = ctxt->input->cur - ctxt->input->base;
14401
14402 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14403
14404 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14405 ctxt->input->cur = ctxt->input->base + cur;
14406 ctxt->input->end =
14407 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14408 use];
14409 #ifdef DEBUG_PUSH
14410 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14411 #endif
14412 }
14413
14414 if (encoding != NULL) {
14415 xmlCharEncodingHandlerPtr hdlr;
14416
14417 if (ctxt->encoding != NULL)
14418 xmlFree((xmlChar *) ctxt->encoding);
14419 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14420
14421 hdlr = xmlFindCharEncodingHandler(encoding);
14422 if (hdlr != NULL) {
14423 xmlSwitchToEncoding(ctxt, hdlr);
14424 } else {
14425 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14426 "Unsupported encoding %s\n", BAD_CAST encoding);
14427 }
14428 } else if (enc != XML_CHAR_ENCODING_NONE) {
14429 xmlSwitchEncoding(ctxt, enc);
14430 }
14431
14432 return(0);
14433 }
14434
14435
14436 /**
14437 * xmlCtxtUseOptionsInternal:
14438 * @ctxt: an XML parser context
14439 * @options: a combination of xmlParserOption
14440 * @encoding: the user provided encoding to use
14441 *
14442 * Applies the options to the parser context
14443 *
14444 * Returns 0 in case of success, the set of unknown or unimplemented options
14445 * in case of error.
14446 */
14447 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14448 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14449 {
14450 if (ctxt == NULL)
14451 return(-1);
14452 if (encoding != NULL) {
14453 if (ctxt->encoding != NULL)
14454 xmlFree((xmlChar *) ctxt->encoding);
14455 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14456 }
14457 if (options & XML_PARSE_RECOVER) {
14458 ctxt->recovery = 1;
14459 options -= XML_PARSE_RECOVER;
14460 ctxt->options |= XML_PARSE_RECOVER;
14461 } else
14462 ctxt->recovery = 0;
14463 if (options & XML_PARSE_DTDLOAD) {
14464 ctxt->loadsubset = XML_DETECT_IDS;
14465 options -= XML_PARSE_DTDLOAD;
14466 ctxt->options |= XML_PARSE_DTDLOAD;
14467 } else
14468 ctxt->loadsubset = 0;
14469 if (options & XML_PARSE_DTDATTR) {
14470 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14471 options -= XML_PARSE_DTDATTR;
14472 ctxt->options |= XML_PARSE_DTDATTR;
14473 }
14474 if (options & XML_PARSE_NOENT) {
14475 ctxt->replaceEntities = 1;
14476 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14477 options -= XML_PARSE_NOENT;
14478 ctxt->options |= XML_PARSE_NOENT;
14479 } else
14480 ctxt->replaceEntities = 0;
14481 if (options & XML_PARSE_PEDANTIC) {
14482 ctxt->pedantic = 1;
14483 options -= XML_PARSE_PEDANTIC;
14484 ctxt->options |= XML_PARSE_PEDANTIC;
14485 } else
14486 ctxt->pedantic = 0;
14487 if (options & XML_PARSE_NOBLANKS) {
14488 ctxt->keepBlanks = 0;
14489 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14490 options -= XML_PARSE_NOBLANKS;
14491 ctxt->options |= XML_PARSE_NOBLANKS;
14492 } else
14493 ctxt->keepBlanks = 1;
14494 if (options & XML_PARSE_DTDVALID) {
14495 ctxt->validate = 1;
14496 if (options & XML_PARSE_NOWARNING)
14497 ctxt->vctxt.warning = NULL;
14498 if (options & XML_PARSE_NOERROR)
14499 ctxt->vctxt.error = NULL;
14500 options -= XML_PARSE_DTDVALID;
14501 ctxt->options |= XML_PARSE_DTDVALID;
14502 } else
14503 ctxt->validate = 0;
14504 if (options & XML_PARSE_NOWARNING) {
14505 ctxt->sax->warning = NULL;
14506 options -= XML_PARSE_NOWARNING;
14507 }
14508 if (options & XML_PARSE_NOERROR) {
14509 ctxt->sax->error = NULL;
14510 ctxt->sax->fatalError = NULL;
14511 options -= XML_PARSE_NOERROR;
14512 }
14513 #ifdef LIBXML_SAX1_ENABLED
14514 if (options & XML_PARSE_SAX1) {
14515 ctxt->sax->startElement = xmlSAX2StartElement;
14516 ctxt->sax->endElement = xmlSAX2EndElement;
14517 ctxt->sax->startElementNs = NULL;
14518 ctxt->sax->endElementNs = NULL;
14519 ctxt->sax->initialized = 1;
14520 options -= XML_PARSE_SAX1;
14521 ctxt->options |= XML_PARSE_SAX1;
14522 }
14523 #endif /* LIBXML_SAX1_ENABLED */
14524 if (options & XML_PARSE_NODICT) {
14525 ctxt->dictNames = 0;
14526 options -= XML_PARSE_NODICT;
14527 ctxt->options |= XML_PARSE_NODICT;
14528 } else {
14529 ctxt->dictNames = 1;
14530 }
14531 if (options & XML_PARSE_NOCDATA) {
14532 ctxt->sax->cdataBlock = NULL;
14533 options -= XML_PARSE_NOCDATA;
14534 ctxt->options |= XML_PARSE_NOCDATA;
14535 }
14536 if (options & XML_PARSE_NSCLEAN) {
14537 ctxt->options |= XML_PARSE_NSCLEAN;
14538 options -= XML_PARSE_NSCLEAN;
14539 }
14540 if (options & XML_PARSE_NONET) {
14541 ctxt->options |= XML_PARSE_NONET;
14542 options -= XML_PARSE_NONET;
14543 }
14544 if (options & XML_PARSE_COMPACT) {
14545 ctxt->options |= XML_PARSE_COMPACT;
14546 options -= XML_PARSE_COMPACT;
14547 }
14548 if (options & XML_PARSE_OLD10) {
14549 ctxt->options |= XML_PARSE_OLD10;
14550 options -= XML_PARSE_OLD10;
14551 }
14552 if (options & XML_PARSE_NOBASEFIX) {
14553 ctxt->options |= XML_PARSE_NOBASEFIX;
14554 options -= XML_PARSE_NOBASEFIX;
14555 }
14556 if (options & XML_PARSE_HUGE) {
14557 ctxt->options |= XML_PARSE_HUGE;
14558 options -= XML_PARSE_HUGE;
14559 }
14560 if (options & XML_PARSE_OLDSAX) {
14561 ctxt->options |= XML_PARSE_OLDSAX;
14562 options -= XML_PARSE_OLDSAX;
14563 }
14564 ctxt->linenumbers = 1;
14565 return (options);
14566 }
14567
14568 /**
14569 * xmlCtxtUseOptions:
14570 * @ctxt: an XML parser context
14571 * @options: a combination of xmlParserOption
14572 *
14573 * Applies the options to the parser context
14574 *
14575 * Returns 0 in case of success, the set of unknown or unimplemented options
14576 * in case of error.
14577 */
14578 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14579 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14580 {
14581 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14582 }
14583
14584 /**
14585 * xmlDoRead:
14586 * @ctxt: an XML parser context
14587 * @URL: the base URL to use for the document
14588 * @encoding: the document encoding, or NULL
14589 * @options: a combination of xmlParserOption
14590 * @reuse: keep the context for reuse
14591 *
14592 * Common front-end for the xmlRead functions
14593 *
14594 * Returns the resulting document tree or NULL
14595 */
14596 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14597 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14598 int options, int reuse)
14599 {
14600 xmlDocPtr ret;
14601
14602 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14603 if (encoding != NULL) {
14604 xmlCharEncodingHandlerPtr hdlr;
14605
14606 hdlr = xmlFindCharEncodingHandler(encoding);
14607 if (hdlr != NULL)
14608 xmlSwitchToEncoding(ctxt, hdlr);
14609 }
14610 if ((URL != NULL) && (ctxt->input != NULL) &&
14611 (ctxt->input->filename == NULL))
14612 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14613 xmlParseDocument(ctxt);
14614 if ((ctxt->wellFormed) || ctxt->recovery)
14615 ret = ctxt->myDoc;
14616 else {
14617 ret = NULL;
14618 if (ctxt->myDoc != NULL) {
14619 xmlFreeDoc(ctxt->myDoc);
14620 }
14621 }
14622 ctxt->myDoc = NULL;
14623 if (!reuse) {
14624 xmlFreeParserCtxt(ctxt);
14625 }
14626
14627 return (ret);
14628 }
14629
14630 /**
14631 * xmlReadDoc:
14632 * @cur: a pointer to a zero terminated string
14633 * @URL: the base URL to use for the document
14634 * @encoding: the document encoding, or NULL
14635 * @options: a combination of xmlParserOption
14636 *
14637 * parse an XML in-memory document and build a tree.
14638 *
14639 * Returns the resulting document tree
14640 */
14641 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14642 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643 {
14644 xmlParserCtxtPtr ctxt;
14645
14646 if (cur == NULL)
14647 return (NULL);
14648
14649 ctxt = xmlCreateDocParserCtxt(cur);
14650 if (ctxt == NULL)
14651 return (NULL);
14652 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14653 }
14654
14655 /**
14656 * xmlReadFile:
14657 * @filename: a file or URL
14658 * @encoding: the document encoding, or NULL
14659 * @options: a combination of xmlParserOption
14660 *
14661 * parse an XML file from the filesystem or the network.
14662 *
14663 * Returns the resulting document tree
14664 */
14665 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14666 xmlReadFile(const char *filename, const char *encoding, int options)
14667 {
14668 xmlParserCtxtPtr ctxt;
14669
14670 ctxt = xmlCreateURLParserCtxt(filename, options);
14671 if (ctxt == NULL)
14672 return (NULL);
14673 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14674 }
14675
14676 /**
14677 * xmlReadMemory:
14678 * @buffer: a pointer to a char array
14679 * @size: the size of the array
14680 * @URL: the base URL to use for the document
14681 * @encoding: the document encoding, or NULL
14682 * @options: a combination of xmlParserOption
14683 *
14684 * parse an XML in-memory document and build a tree.
14685 *
14686 * Returns the resulting document tree
14687 */
14688 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14689 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14690 {
14691 xmlParserCtxtPtr ctxt;
14692
14693 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14694 if (ctxt == NULL)
14695 return (NULL);
14696 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14697 }
14698
14699 /**
14700 * xmlReadFd:
14701 * @fd: an open file descriptor
14702 * @URL: the base URL to use for the document
14703 * @encoding: the document encoding, or NULL
14704 * @options: a combination of xmlParserOption
14705 *
14706 * parse an XML from a file descriptor and build a tree.
14707 * NOTE that the file descriptor will not be closed when the
14708 * reader is closed or reset.
14709 *
14710 * Returns the resulting document tree
14711 */
14712 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14713 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14714 {
14715 xmlParserCtxtPtr ctxt;
14716 xmlParserInputBufferPtr input;
14717 xmlParserInputPtr stream;
14718
14719 if (fd < 0)
14720 return (NULL);
14721
14722 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14723 if (input == NULL)
14724 return (NULL);
14725 input->closecallback = NULL;
14726 ctxt = xmlNewParserCtxt();
14727 if (ctxt == NULL) {
14728 xmlFreeParserInputBuffer(input);
14729 return (NULL);
14730 }
14731 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14732 if (stream == NULL) {
14733 xmlFreeParserInputBuffer(input);
14734 xmlFreeParserCtxt(ctxt);
14735 return (NULL);
14736 }
14737 inputPush(ctxt, stream);
14738 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14739 }
14740
14741 /**
14742 * xmlReadIO:
14743 * @ioread: an I/O read function
14744 * @ioclose: an I/O close function
14745 * @ioctx: an I/O handler
14746 * @URL: the base URL to use for the document
14747 * @encoding: the document encoding, or NULL
14748 * @options: a combination of xmlParserOption
14749 *
14750 * parse an XML document from I/O functions and source and build a tree.
14751 *
14752 * Returns the resulting document tree
14753 */
14754 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14755 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14756 void *ioctx, const char *URL, const char *encoding, int options)
14757 {
14758 xmlParserCtxtPtr ctxt;
14759 xmlParserInputBufferPtr input;
14760 xmlParserInputPtr stream;
14761
14762 if (ioread == NULL)
14763 return (NULL);
14764
14765 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14766 XML_CHAR_ENCODING_NONE);
14767 if (input == NULL)
14768 return (NULL);
14769 ctxt = xmlNewParserCtxt();
14770 if (ctxt == NULL) {
14771 xmlFreeParserInputBuffer(input);
14772 return (NULL);
14773 }
14774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14775 if (stream == NULL) {
14776 xmlFreeParserInputBuffer(input);
14777 xmlFreeParserCtxt(ctxt);
14778 return (NULL);
14779 }
14780 inputPush(ctxt, stream);
14781 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14782 }
14783
14784 /**
14785 * xmlCtxtReadDoc:
14786 * @ctxt: an XML parser context
14787 * @cur: a pointer to a zero terminated string
14788 * @URL: the base URL to use for the document
14789 * @encoding: the document encoding, or NULL
14790 * @options: a combination of xmlParserOption
14791 *
14792 * parse an XML in-memory document and build a tree.
14793 * This reuses the existing @ctxt parser context
14794 *
14795 * Returns the resulting document tree
14796 */
14797 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14798 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14799 const char *URL, const char *encoding, int options)
14800 {
14801 xmlParserInputPtr stream;
14802
14803 if (cur == NULL)
14804 return (NULL);
14805 if (ctxt == NULL)
14806 return (NULL);
14807
14808 xmlCtxtReset(ctxt);
14809
14810 stream = xmlNewStringInputStream(ctxt, cur);
14811 if (stream == NULL) {
14812 return (NULL);
14813 }
14814 inputPush(ctxt, stream);
14815 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14816 }
14817
14818 /**
14819 * xmlCtxtReadFile:
14820 * @ctxt: an XML parser context
14821 * @filename: a file or URL
14822 * @encoding: the document encoding, or NULL
14823 * @options: a combination of xmlParserOption
14824 *
14825 * parse an XML file from the filesystem or the network.
14826 * This reuses the existing @ctxt parser context
14827 *
14828 * Returns the resulting document tree
14829 */
14830 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14831 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14832 const char *encoding, int options)
14833 {
14834 xmlParserInputPtr stream;
14835
14836 if (filename == NULL)
14837 return (NULL);
14838 if (ctxt == NULL)
14839 return (NULL);
14840
14841 xmlCtxtReset(ctxt);
14842
14843 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14844 if (stream == NULL) {
14845 return (NULL);
14846 }
14847 inputPush(ctxt, stream);
14848 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14849 }
14850
14851 /**
14852 * xmlCtxtReadMemory:
14853 * @ctxt: an XML parser context
14854 * @buffer: a pointer to a char array
14855 * @size: the size of the array
14856 * @URL: the base URL to use for the document
14857 * @encoding: the document encoding, or NULL
14858 * @options: a combination of xmlParserOption
14859 *
14860 * parse an XML in-memory document and build a tree.
14861 * This reuses the existing @ctxt parser context
14862 *
14863 * Returns the resulting document tree
14864 */
14865 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14866 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14867 const char *URL, const char *encoding, int options)
14868 {
14869 xmlParserInputBufferPtr input;
14870 xmlParserInputPtr stream;
14871
14872 if (ctxt == NULL)
14873 return (NULL);
14874 if (buffer == NULL)
14875 return (NULL);
14876
14877 xmlCtxtReset(ctxt);
14878
14879 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14880 if (input == NULL) {
14881 return(NULL);
14882 }
14883
14884 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14885 if (stream == NULL) {
14886 xmlFreeParserInputBuffer(input);
14887 return(NULL);
14888 }
14889
14890 inputPush(ctxt, stream);
14891 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14892 }
14893
14894 /**
14895 * xmlCtxtReadFd:
14896 * @ctxt: an XML parser context
14897 * @fd: an open file descriptor
14898 * @URL: the base URL to use for the document
14899 * @encoding: the document encoding, or NULL
14900 * @options: a combination of xmlParserOption
14901 *
14902 * parse an XML from a file descriptor and build a tree.
14903 * This reuses the existing @ctxt parser context
14904 * NOTE that the file descriptor will not be closed when the
14905 * reader is closed or reset.
14906 *
14907 * Returns the resulting document tree
14908 */
14909 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14910 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14911 const char *URL, const char *encoding, int options)
14912 {
14913 xmlParserInputBufferPtr input;
14914 xmlParserInputPtr stream;
14915
14916 if (fd < 0)
14917 return (NULL);
14918 if (ctxt == NULL)
14919 return (NULL);
14920
14921 xmlCtxtReset(ctxt);
14922
14923
14924 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14925 if (input == NULL)
14926 return (NULL);
14927 input->closecallback = NULL;
14928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929 if (stream == NULL) {
14930 xmlFreeParserInputBuffer(input);
14931 return (NULL);
14932 }
14933 inputPush(ctxt, stream);
14934 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14935 }
14936
14937 /**
14938 * xmlCtxtReadIO:
14939 * @ctxt: an XML parser context
14940 * @ioread: an I/O read function
14941 * @ioclose: an I/O close function
14942 * @ioctx: an I/O handler
14943 * @URL: the base URL to use for the document
14944 * @encoding: the document encoding, or NULL
14945 * @options: a combination of xmlParserOption
14946 *
14947 * parse an XML document from I/O functions and source and build a tree.
14948 * This reuses the existing @ctxt parser context
14949 *
14950 * Returns the resulting document tree
14951 */
14952 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14953 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14954 xmlInputCloseCallback ioclose, void *ioctx,
14955 const char *URL,
14956 const char *encoding, int options)
14957 {
14958 xmlParserInputBufferPtr input;
14959 xmlParserInputPtr stream;
14960
14961 if (ioread == NULL)
14962 return (NULL);
14963 if (ctxt == NULL)
14964 return (NULL);
14965
14966 xmlCtxtReset(ctxt);
14967
14968 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14969 XML_CHAR_ENCODING_NONE);
14970 if (input == NULL)
14971 return (NULL);
14972 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14973 if (stream == NULL) {
14974 xmlFreeParserInputBuffer(input);
14975 return (NULL);
14976 }
14977 inputPush(ctxt, stream);
14978 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14979 }
14980
14981 #define bottom_parser
14982 #include "elfgcchack.h"
14983