1 /* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33 #define IN_LIBXML 34 #include "libxml.h" 35 36 #if defined(WIN32) && !defined (__CYGWIN__) 37 #define XML_DIR_SEP '\\' 38 #else 39 #define XML_DIR_SEP '/' 40 #endif 41 42 #include <stdlib.h> 43 #include <limits.h> 44 #include <string.h> 45 #include <stdarg.h> 46 #include <libxml/xmlmemory.h> 47 #include <libxml/threads.h> 48 #include <libxml/globals.h> 49 #include <libxml/tree.h> 50 #include <libxml/parser.h> 51 #include <libxml/parserInternals.h> 52 #include <libxml/valid.h> 53 #include <libxml/entities.h> 54 #include <libxml/xmlerror.h> 55 #include <libxml/encoding.h> 56 #include <libxml/xmlIO.h> 57 #include <libxml/uri.h> 58 #ifdef LIBXML_CATALOG_ENABLED 59 #include <libxml/catalog.h> 60 #endif 61 #ifdef LIBXML_SCHEMAS_ENABLED 62 #include <libxml/xmlschemastypes.h> 63 #include <libxml/relaxng.h> 64 #endif 65 #ifdef HAVE_CTYPE_H 66 #include <ctype.h> 67 #endif 68 #ifdef HAVE_STDLIB_H 69 #include <stdlib.h> 70 #endif 71 #ifdef HAVE_SYS_STAT_H 72 #include <sys/stat.h> 73 #endif 74 #ifdef HAVE_FCNTL_H 75 #include <fcntl.h> 76 #endif 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 #ifdef HAVE_ZLIB_H 81 #include <zlib.h> 82 #endif 83 #ifdef HAVE_LZMA_H 84 #include <lzma.h> 85 #endif 86 87 #include "buf.h" 88 #include "enc.h" 89 90 static void 91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 92 93 static xmlParserCtxtPtr 94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 95 const xmlChar *base, xmlParserCtxtPtr pctx); 96 97 static void xmlHaltParser(xmlParserCtxtPtr ctxt); 98 99 /************************************************************************ 100 * * 101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 102 * * 103 ************************************************************************/ 104 105 #define XML_PARSER_BIG_ENTITY 1000 106 #define XML_PARSER_LOT_ENTITY 5000 107 108 /* 109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 110 * replacement over the size in byte of the input indicates that you have 111 * and eponential behaviour. A value of 10 correspond to at least 3 entity 112 * replacement per byte of input. 113 */ 114 #define XML_PARSER_NON_LINEAR 10 115 116 /* 117 * xmlParserEntityCheck 118 * 119 * Function to check non-linear entity expansion behaviour 120 * This is here to detect and stop exponential linear entity expansion 121 * This is not a limitation of the parser but a safety 122 * boundary feature. It can be disabled with the XML_PARSE_HUGE 123 * parser option. 124 */ 125 static int 126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, 127 xmlEntityPtr ent, size_t replacement) 128 { 129 size_t consumed = 0; 130 131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 132 return (0); 133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 134 return (1); 135 136 /* 137 * This may look absurd but is needed to detect 138 * entities problems 139 */ 140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 141 (ent->content != NULL) && (ent->checked == 0) && 142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { 143 unsigned long oldnbent = ctxt->nbentities; 144 xmlChar *rep; 145 146 ent->checked = 1; 147 148 ++ctxt->depth; 149 rep = xmlStringDecodeEntities(ctxt, ent->content, 150 XML_SUBSTITUTE_REF, 0, 0, 0); 151 --ctxt->depth; 152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) { 153 ent->content[0] = 0; 154 } 155 156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 157 if (rep != NULL) { 158 if (xmlStrchr(rep, '<')) 159 ent->checked |= 1; 160 xmlFree(rep); 161 rep = NULL; 162 } 163 } 164 if (replacement != 0) { 165 if (replacement < XML_MAX_TEXT_LENGTH) 166 return(0); 167 168 /* 169 * If the volume of entity copy reaches 10 times the 170 * amount of parsed data and over the large text threshold 171 * then that's very likely to be an abuse. 172 */ 173 if (ctxt->input != NULL) { 174 consumed = ctxt->input->consumed + 175 (ctxt->input->cur - ctxt->input->base); 176 } 177 consumed += ctxt->sizeentities; 178 179 if (replacement < XML_PARSER_NON_LINEAR * consumed) 180 return(0); 181 } else if (size != 0) { 182 /* 183 * Do the check based on the replacement size of the entity 184 */ 185 if (size < XML_PARSER_BIG_ENTITY) 186 return(0); 187 188 /* 189 * A limit on the amount of text data reasonably used 190 */ 191 if (ctxt->input != NULL) { 192 consumed = ctxt->input->consumed + 193 (ctxt->input->cur - ctxt->input->base); 194 } 195 consumed += ctxt->sizeentities; 196 197 if ((size < XML_PARSER_NON_LINEAR * consumed) && 198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 199 return (0); 200 } else if (ent != NULL) { 201 /* 202 * use the number of parsed entities in the replacement 203 */ 204 size = ent->checked / 2; 205 206 /* 207 * The amount of data parsed counting entities size only once 208 */ 209 if (ctxt->input != NULL) { 210 consumed = ctxt->input->consumed + 211 (ctxt->input->cur - ctxt->input->base); 212 } 213 consumed += ctxt->sizeentities; 214 215 /* 216 * Check the density of entities for the amount of data 217 * knowing an entity reference will take at least 3 bytes 218 */ 219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 220 return (0); 221 } else { 222 /* 223 * strange we got no data for checking 224 */ 225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && 226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || 227 (ctxt->nbentities <= 10000)) 228 return (0); 229 } 230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 231 return (1); 232 } 233 234 /** 235 * xmlParserMaxDepth: 236 * 237 * arbitrary depth limit for the XML documents that we allow to 238 * process. This is not a limitation of the parser but a safety 239 * boundary feature. It can be disabled with the XML_PARSE_HUGE 240 * parser option. 241 */ 242 unsigned int xmlParserMaxDepth = 256; 243 244 245 246 #define SAX2 1 247 #define XML_PARSER_BIG_BUFFER_SIZE 300 248 #define XML_PARSER_BUFFER_SIZE 100 249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 250 251 /** 252 * XML_PARSER_CHUNK_SIZE 253 * 254 * When calling GROW that's the minimal amount of data 255 * the parser expected to have received. It is not a hard 256 * limit but an optimization when reading strings like Names 257 * It is not strictly needed as long as inputs available characters 258 * are followed by 0, which should be provided by the I/O level 259 */ 260 #define XML_PARSER_CHUNK_SIZE 100 261 262 /* 263 * List of XML prefixed PI allowed by W3C specs 264 */ 265 266 static const char *xmlW3CPIs[] = { 267 "xml-stylesheet", 268 "xml-model", 269 NULL 270 }; 271 272 273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 275 const xmlChar **str); 276 277 static xmlParserErrors 278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 279 xmlSAXHandlerPtr sax, 280 void *user_data, int depth, const xmlChar *URL, 281 const xmlChar *ID, xmlNodePtr *list); 282 283 static int 284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 285 const char *encoding); 286 #ifdef LIBXML_LEGACY_ENABLED 287 static void 288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 289 xmlNodePtr lastNode); 290 #endif /* LIBXML_LEGACY_ENABLED */ 291 292 static xmlParserErrors 293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 294 const xmlChar *string, void *user_data, xmlNodePtr *lst); 295 296 static int 297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 298 299 /************************************************************************ 300 * * 301 * Some factorized error routines * 302 * * 303 ************************************************************************/ 304 305 /** 306 * xmlErrAttributeDup: 307 * @ctxt: an XML parser context 308 * @prefix: the attribute prefix 309 * @localname: the attribute localname 310 * 311 * Handle a redefinition of attribute error 312 */ 313 static void 314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 315 const xmlChar * localname) 316 { 317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 318 (ctxt->instate == XML_PARSER_EOF)) 319 return; 320 if (ctxt != NULL) 321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 322 323 if (prefix == NULL) 324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 326 (const char *) localname, NULL, NULL, 0, 0, 327 "Attribute %s redefined\n", localname); 328 else 329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 331 (const char *) prefix, (const char *) localname, 332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 333 localname); 334 if (ctxt != NULL) { 335 ctxt->wellFormed = 0; 336 if (ctxt->recovery == 0) 337 ctxt->disableSAX = 1; 338 } 339 } 340 341 /** 342 * xmlFatalErr: 343 * @ctxt: an XML parser context 344 * @error: the error number 345 * @extra: extra information string 346 * 347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 348 */ 349 static void 350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 351 { 352 const char *errmsg; 353 354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 355 (ctxt->instate == XML_PARSER_EOF)) 356 return; 357 switch (error) { 358 case XML_ERR_INVALID_HEX_CHARREF: 359 errmsg = "CharRef: invalid hexadecimal value"; 360 break; 361 case XML_ERR_INVALID_DEC_CHARREF: 362 errmsg = "CharRef: invalid decimal value"; 363 break; 364 case XML_ERR_INVALID_CHARREF: 365 errmsg = "CharRef: invalid value"; 366 break; 367 case XML_ERR_INTERNAL_ERROR: 368 errmsg = "internal error"; 369 break; 370 case XML_ERR_PEREF_AT_EOF: 371 errmsg = "PEReference at end of document"; 372 break; 373 case XML_ERR_PEREF_IN_PROLOG: 374 errmsg = "PEReference in prolog"; 375 break; 376 case XML_ERR_PEREF_IN_EPILOG: 377 errmsg = "PEReference in epilog"; 378 break; 379 case XML_ERR_PEREF_NO_NAME: 380 errmsg = "PEReference: no name"; 381 break; 382 case XML_ERR_PEREF_SEMICOL_MISSING: 383 errmsg = "PEReference: expecting ';'"; 384 break; 385 case XML_ERR_ENTITY_LOOP: 386 errmsg = "Detected an entity reference loop"; 387 break; 388 case XML_ERR_ENTITY_NOT_STARTED: 389 errmsg = "EntityValue: \" or ' expected"; 390 break; 391 case XML_ERR_ENTITY_PE_INTERNAL: 392 errmsg = "PEReferences forbidden in internal subset"; 393 break; 394 case XML_ERR_ENTITY_NOT_FINISHED: 395 errmsg = "EntityValue: \" or ' expected"; 396 break; 397 case XML_ERR_ATTRIBUTE_NOT_STARTED: 398 errmsg = "AttValue: \" or ' expected"; 399 break; 400 case XML_ERR_LT_IN_ATTRIBUTE: 401 errmsg = "Unescaped '<' not allowed in attributes values"; 402 break; 403 case XML_ERR_LITERAL_NOT_STARTED: 404 errmsg = "SystemLiteral \" or ' expected"; 405 break; 406 case XML_ERR_LITERAL_NOT_FINISHED: 407 errmsg = "Unfinished System or Public ID \" or ' expected"; 408 break; 409 case XML_ERR_MISPLACED_CDATA_END: 410 errmsg = "Sequence ']]>' not allowed in content"; 411 break; 412 case XML_ERR_URI_REQUIRED: 413 errmsg = "SYSTEM or PUBLIC, the URI is missing"; 414 break; 415 case XML_ERR_PUBID_REQUIRED: 416 errmsg = "PUBLIC, the Public Identifier is missing"; 417 break; 418 case XML_ERR_HYPHEN_IN_COMMENT: 419 errmsg = "Comment must not contain '--' (double-hyphen)"; 420 break; 421 case XML_ERR_PI_NOT_STARTED: 422 errmsg = "xmlParsePI : no target name"; 423 break; 424 case XML_ERR_RESERVED_XML_NAME: 425 errmsg = "Invalid PI name"; 426 break; 427 case XML_ERR_NOTATION_NOT_STARTED: 428 errmsg = "NOTATION: Name expected here"; 429 break; 430 case XML_ERR_NOTATION_NOT_FINISHED: 431 errmsg = "'>' required to close NOTATION declaration"; 432 break; 433 case XML_ERR_VALUE_REQUIRED: 434 errmsg = "Entity value required"; 435 break; 436 case XML_ERR_URI_FRAGMENT: 437 errmsg = "Fragment not allowed"; 438 break; 439 case XML_ERR_ATTLIST_NOT_STARTED: 440 errmsg = "'(' required to start ATTLIST enumeration"; 441 break; 442 case XML_ERR_NMTOKEN_REQUIRED: 443 errmsg = "NmToken expected in ATTLIST enumeration"; 444 break; 445 case XML_ERR_ATTLIST_NOT_FINISHED: 446 errmsg = "')' required to finish ATTLIST enumeration"; 447 break; 448 case XML_ERR_MIXED_NOT_STARTED: 449 errmsg = "MixedContentDecl : '|' or ')*' expected"; 450 break; 451 case XML_ERR_PCDATA_REQUIRED: 452 errmsg = "MixedContentDecl : '#PCDATA' expected"; 453 break; 454 case XML_ERR_ELEMCONTENT_NOT_STARTED: 455 errmsg = "ContentDecl : Name or '(' expected"; 456 break; 457 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 458 errmsg = "ContentDecl : ',' '|' or ')' expected"; 459 break; 460 case XML_ERR_PEREF_IN_INT_SUBSET: 461 errmsg = 462 "PEReference: forbidden within markup decl in internal subset"; 463 break; 464 case XML_ERR_GT_REQUIRED: 465 errmsg = "expected '>'"; 466 break; 467 case XML_ERR_CONDSEC_INVALID: 468 errmsg = "XML conditional section '[' expected"; 469 break; 470 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 471 errmsg = "Content error in the external subset"; 472 break; 473 case XML_ERR_CONDSEC_INVALID_KEYWORD: 474 errmsg = 475 "conditional section INCLUDE or IGNORE keyword expected"; 476 break; 477 case XML_ERR_CONDSEC_NOT_FINISHED: 478 errmsg = "XML conditional section not closed"; 479 break; 480 case XML_ERR_XMLDECL_NOT_STARTED: 481 errmsg = "Text declaration '<?xml' required"; 482 break; 483 case XML_ERR_XMLDECL_NOT_FINISHED: 484 errmsg = "parsing XML declaration: '?>' expected"; 485 break; 486 case XML_ERR_EXT_ENTITY_STANDALONE: 487 errmsg = "external parsed entities cannot be standalone"; 488 break; 489 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 490 errmsg = "EntityRef: expecting ';'"; 491 break; 492 case XML_ERR_DOCTYPE_NOT_FINISHED: 493 errmsg = "DOCTYPE improperly terminated"; 494 break; 495 case XML_ERR_LTSLASH_REQUIRED: 496 errmsg = "EndTag: '</' not found"; 497 break; 498 case XML_ERR_EQUAL_REQUIRED: 499 errmsg = "expected '='"; 500 break; 501 case XML_ERR_STRING_NOT_CLOSED: 502 errmsg = "String not closed expecting \" or '"; 503 break; 504 case XML_ERR_STRING_NOT_STARTED: 505 errmsg = "String not started expecting ' or \""; 506 break; 507 case XML_ERR_ENCODING_NAME: 508 errmsg = "Invalid XML encoding name"; 509 break; 510 case XML_ERR_STANDALONE_VALUE: 511 errmsg = "standalone accepts only 'yes' or 'no'"; 512 break; 513 case XML_ERR_DOCUMENT_EMPTY: 514 errmsg = "Document is empty"; 515 break; 516 case XML_ERR_DOCUMENT_END: 517 errmsg = "Extra content at the end of the document"; 518 break; 519 case XML_ERR_NOT_WELL_BALANCED: 520 errmsg = "chunk is not well balanced"; 521 break; 522 case XML_ERR_EXTRA_CONTENT: 523 errmsg = "extra content at the end of well balanced chunk"; 524 break; 525 case XML_ERR_VERSION_MISSING: 526 errmsg = "Malformed declaration expecting version"; 527 break; 528 case XML_ERR_NAME_TOO_LONG: 529 errmsg = "Name too long use XML_PARSE_HUGE option"; 530 break; 531 #if 0 532 case: 533 errmsg = ""; 534 break; 535 #endif 536 default: 537 errmsg = "Unregistered error message"; 538 } 539 if (ctxt != NULL) 540 ctxt->errNo = error; 541 if (info == NULL) { 542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", 544 errmsg); 545 } else { 546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", 548 errmsg, info); 549 } 550 if (ctxt != NULL) { 551 ctxt->wellFormed = 0; 552 if (ctxt->recovery == 0) 553 ctxt->disableSAX = 1; 554 } 555 } 556 557 /** 558 * xmlFatalErrMsg: 559 * @ctxt: an XML parser context 560 * @error: the error number 561 * @msg: the error message 562 * 563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 564 */ 565 static void LIBXML_ATTR_FORMAT(3,0) 566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 567 const char *msg) 568 { 569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 570 (ctxt->instate == XML_PARSER_EOF)) 571 return; 572 if (ctxt != NULL) 573 ctxt->errNo = error; 574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 576 if (ctxt != NULL) { 577 ctxt->wellFormed = 0; 578 if (ctxt->recovery == 0) 579 ctxt->disableSAX = 1; 580 } 581 } 582 583 /** 584 * xmlWarningMsg: 585 * @ctxt: an XML parser context 586 * @error: the error number 587 * @msg: the error message 588 * @str1: extra data 589 * @str2: extra data 590 * 591 * Handle a warning. 592 */ 593 static void LIBXML_ATTR_FORMAT(3,0) 594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg, const xmlChar *str1, const xmlChar *str2) 596 { 597 xmlStructuredErrorFunc schannel = NULL; 598 599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 600 (ctxt->instate == XML_PARSER_EOF)) 601 return; 602 if ((ctxt != NULL) && (ctxt->sax != NULL) && 603 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 604 schannel = ctxt->sax->serror; 605 if (ctxt != NULL) { 606 __xmlRaiseError(schannel, 607 (ctxt->sax) ? ctxt->sax->warning : NULL, 608 ctxt->userData, 609 ctxt, NULL, XML_FROM_PARSER, error, 610 XML_ERR_WARNING, NULL, 0, 611 (const char *) str1, (const char *) str2, NULL, 0, 0, 612 msg, (const char *) str1, (const char *) str2); 613 } else { 614 __xmlRaiseError(schannel, NULL, NULL, 615 ctxt, NULL, XML_FROM_PARSER, error, 616 XML_ERR_WARNING, NULL, 0, 617 (const char *) str1, (const char *) str2, NULL, 0, 0, 618 msg, (const char *) str1, (const char *) str2); 619 } 620 } 621 622 /** 623 * xmlValidityError: 624 * @ctxt: an XML parser context 625 * @error: the error number 626 * @msg: the error message 627 * @str1: extra data 628 * 629 * Handle a validity error. 630 */ 631 static void LIBXML_ATTR_FORMAT(3,0) 632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 633 const char *msg, const xmlChar *str1, const xmlChar *str2) 634 { 635 xmlStructuredErrorFunc schannel = NULL; 636 637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 638 (ctxt->instate == XML_PARSER_EOF)) 639 return; 640 if (ctxt != NULL) { 641 ctxt->errNo = error; 642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 643 schannel = ctxt->sax->serror; 644 } 645 if (ctxt != NULL) { 646 __xmlRaiseError(schannel, 647 ctxt->vctxt.error, ctxt->vctxt.userData, 648 ctxt, NULL, XML_FROM_DTD, error, 649 XML_ERR_ERROR, NULL, 0, (const char *) str1, 650 (const char *) str2, NULL, 0, 0, 651 msg, (const char *) str1, (const char *) str2); 652 ctxt->valid = 0; 653 } else { 654 __xmlRaiseError(schannel, NULL, NULL, 655 ctxt, NULL, XML_FROM_DTD, error, 656 XML_ERR_ERROR, NULL, 0, (const char *) str1, 657 (const char *) str2, NULL, 0, 0, 658 msg, (const char *) str1, (const char *) str2); 659 } 660 } 661 662 /** 663 * xmlFatalErrMsgInt: 664 * @ctxt: an XML parser context 665 * @error: the error number 666 * @msg: the error message 667 * @val: an integer value 668 * 669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 670 */ 671 static void LIBXML_ATTR_FORMAT(3,0) 672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 673 const char *msg, int val) 674 { 675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 676 (ctxt->instate == XML_PARSER_EOF)) 677 return; 678 if (ctxt != NULL) 679 ctxt->errNo = error; 680 __xmlRaiseError(NULL, NULL, NULL, 681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 683 if (ctxt != NULL) { 684 ctxt->wellFormed = 0; 685 if (ctxt->recovery == 0) 686 ctxt->disableSAX = 1; 687 } 688 } 689 690 /** 691 * xmlFatalErrMsgStrIntStr: 692 * @ctxt: an XML parser context 693 * @error: the error number 694 * @msg: the error message 695 * @str1: an string info 696 * @val: an integer value 697 * @str2: an string info 698 * 699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 700 */ 701 static void LIBXML_ATTR_FORMAT(3,0) 702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 703 const char *msg, const xmlChar *str1, int val, 704 const xmlChar *str2) 705 { 706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 707 (ctxt->instate == XML_PARSER_EOF)) 708 return; 709 if (ctxt != NULL) 710 ctxt->errNo = error; 711 __xmlRaiseError(NULL, NULL, NULL, 712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 713 NULL, 0, (const char *) str1, (const char *) str2, 714 NULL, val, 0, msg, str1, val, str2); 715 if (ctxt != NULL) { 716 ctxt->wellFormed = 0; 717 if (ctxt->recovery == 0) 718 ctxt->disableSAX = 1; 719 } 720 } 721 722 /** 723 * xmlFatalErrMsgStr: 724 * @ctxt: an XML parser context 725 * @error: the error number 726 * @msg: the error message 727 * @val: a string value 728 * 729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 730 */ 731 static void LIBXML_ATTR_FORMAT(3,0) 732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 733 const char *msg, const xmlChar * val) 734 { 735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 736 (ctxt->instate == XML_PARSER_EOF)) 737 return; 738 if (ctxt != NULL) 739 ctxt->errNo = error; 740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 741 XML_FROM_PARSER, error, XML_ERR_FATAL, 742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 743 val); 744 if (ctxt != NULL) { 745 ctxt->wellFormed = 0; 746 if (ctxt->recovery == 0) 747 ctxt->disableSAX = 1; 748 } 749 } 750 751 /** 752 * xmlErrMsgStr: 753 * @ctxt: an XML parser context 754 * @error: the error number 755 * @msg: the error message 756 * @val: a string value 757 * 758 * Handle a non fatal parser error 759 */ 760 static void LIBXML_ATTR_FORMAT(3,0) 761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 762 const char *msg, const xmlChar * val) 763 { 764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 765 (ctxt->instate == XML_PARSER_EOF)) 766 return; 767 if (ctxt != NULL) 768 ctxt->errNo = error; 769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 770 XML_FROM_PARSER, error, XML_ERR_ERROR, 771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 772 val); 773 } 774 775 /** 776 * xmlNsErr: 777 * @ctxt: an XML parser context 778 * @error: the error number 779 * @msg: the message 780 * @info1: extra information string 781 * @info2: extra information string 782 * 783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 784 */ 785 static void LIBXML_ATTR_FORMAT(3,0) 786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 787 const char *msg, 788 const xmlChar * info1, const xmlChar * info2, 789 const xmlChar * info3) 790 { 791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 792 (ctxt->instate == XML_PARSER_EOF)) 793 return; 794 if (ctxt != NULL) 795 ctxt->errNo = error; 796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 797 XML_ERR_ERROR, NULL, 0, (const char *) info1, 798 (const char *) info2, (const char *) info3, 0, 0, msg, 799 info1, info2, info3); 800 if (ctxt != NULL) 801 ctxt->nsWellFormed = 0; 802 } 803 804 /** 805 * xmlNsWarn 806 * @ctxt: an XML parser context 807 * @error: the error number 808 * @msg: the message 809 * @info1: extra information string 810 * @info2: extra information string 811 * 812 * Handle a namespace warning error 813 */ 814 static void LIBXML_ATTR_FORMAT(3,0) 815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 816 const char *msg, 817 const xmlChar * info1, const xmlChar * info2, 818 const xmlChar * info3) 819 { 820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 821 (ctxt->instate == XML_PARSER_EOF)) 822 return; 823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 824 XML_ERR_WARNING, NULL, 0, (const char *) info1, 825 (const char *) info2, (const char *) info3, 0, 0, msg, 826 info1, info2, info3); 827 } 828 829 /************************************************************************ 830 * * 831 * Library wide options * 832 * * 833 ************************************************************************/ 834 835 /** 836 * xmlHasFeature: 837 * @feature: the feature to be examined 838 * 839 * Examines if the library has been compiled with a given feature. 840 * 841 * Returns a non-zero value if the feature exist, otherwise zero. 842 * Returns zero (0) if the feature does not exist or an unknown 843 * unknown feature is requested, non-zero otherwise. 844 */ 845 int 846 xmlHasFeature(xmlFeature feature) 847 { 848 switch (feature) { 849 case XML_WITH_THREAD: 850 #ifdef LIBXML_THREAD_ENABLED 851 return(1); 852 #else 853 return(0); 854 #endif 855 case XML_WITH_TREE: 856 #ifdef LIBXML_TREE_ENABLED 857 return(1); 858 #else 859 return(0); 860 #endif 861 case XML_WITH_OUTPUT: 862 #ifdef LIBXML_OUTPUT_ENABLED 863 return(1); 864 #else 865 return(0); 866 #endif 867 case XML_WITH_PUSH: 868 #ifdef LIBXML_PUSH_ENABLED 869 return(1); 870 #else 871 return(0); 872 #endif 873 case XML_WITH_READER: 874 #ifdef LIBXML_READER_ENABLED 875 return(1); 876 #else 877 return(0); 878 #endif 879 case XML_WITH_PATTERN: 880 #ifdef LIBXML_PATTERN_ENABLED 881 return(1); 882 #else 883 return(0); 884 #endif 885 case XML_WITH_WRITER: 886 #ifdef LIBXML_WRITER_ENABLED 887 return(1); 888 #else 889 return(0); 890 #endif 891 case XML_WITH_SAX1: 892 #ifdef LIBXML_SAX1_ENABLED 893 return(1); 894 #else 895 return(0); 896 #endif 897 case XML_WITH_FTP: 898 #ifdef LIBXML_FTP_ENABLED 899 return(1); 900 #else 901 return(0); 902 #endif 903 case XML_WITH_HTTP: 904 #ifdef LIBXML_HTTP_ENABLED 905 return(1); 906 #else 907 return(0); 908 #endif 909 case XML_WITH_VALID: 910 #ifdef LIBXML_VALID_ENABLED 911 return(1); 912 #else 913 return(0); 914 #endif 915 case XML_WITH_HTML: 916 #ifdef LIBXML_HTML_ENABLED 917 return(1); 918 #else 919 return(0); 920 #endif 921 case XML_WITH_LEGACY: 922 #ifdef LIBXML_LEGACY_ENABLED 923 return(1); 924 #else 925 return(0); 926 #endif 927 case XML_WITH_C14N: 928 #ifdef LIBXML_C14N_ENABLED 929 return(1); 930 #else 931 return(0); 932 #endif 933 case XML_WITH_CATALOG: 934 #ifdef LIBXML_CATALOG_ENABLED 935 return(1); 936 #else 937 return(0); 938 #endif 939 case XML_WITH_XPATH: 940 #ifdef LIBXML_XPATH_ENABLED 941 return(1); 942 #else 943 return(0); 944 #endif 945 case XML_WITH_XPTR: 946 #ifdef LIBXML_XPTR_ENABLED 947 return(1); 948 #else 949 return(0); 950 #endif 951 case XML_WITH_XINCLUDE: 952 #ifdef LIBXML_XINCLUDE_ENABLED 953 return(1); 954 #else 955 return(0); 956 #endif 957 case XML_WITH_ICONV: 958 #ifdef LIBXML_ICONV_ENABLED 959 return(1); 960 #else 961 return(0); 962 #endif 963 case XML_WITH_ISO8859X: 964 #ifdef LIBXML_ISO8859X_ENABLED 965 return(1); 966 #else 967 return(0); 968 #endif 969 case XML_WITH_UNICODE: 970 #ifdef LIBXML_UNICODE_ENABLED 971 return(1); 972 #else 973 return(0); 974 #endif 975 case XML_WITH_REGEXP: 976 #ifdef LIBXML_REGEXP_ENABLED 977 return(1); 978 #else 979 return(0); 980 #endif 981 case XML_WITH_AUTOMATA: 982 #ifdef LIBXML_AUTOMATA_ENABLED 983 return(1); 984 #else 985 return(0); 986 #endif 987 case XML_WITH_EXPR: 988 #ifdef LIBXML_EXPR_ENABLED 989 return(1); 990 #else 991 return(0); 992 #endif 993 case XML_WITH_SCHEMAS: 994 #ifdef LIBXML_SCHEMAS_ENABLED 995 return(1); 996 #else 997 return(0); 998 #endif 999 case XML_WITH_SCHEMATRON: 1000 #ifdef LIBXML_SCHEMATRON_ENABLED 1001 return(1); 1002 #else 1003 return(0); 1004 #endif 1005 case XML_WITH_MODULES: 1006 #ifdef LIBXML_MODULES_ENABLED 1007 return(1); 1008 #else 1009 return(0); 1010 #endif 1011 case XML_WITH_DEBUG: 1012 #ifdef LIBXML_DEBUG_ENABLED 1013 return(1); 1014 #else 1015 return(0); 1016 #endif 1017 case XML_WITH_DEBUG_MEM: 1018 #ifdef DEBUG_MEMORY_LOCATION 1019 return(1); 1020 #else 1021 return(0); 1022 #endif 1023 case XML_WITH_DEBUG_RUN: 1024 #ifdef LIBXML_DEBUG_RUNTIME 1025 return(1); 1026 #else 1027 return(0); 1028 #endif 1029 case XML_WITH_ZLIB: 1030 #ifdef LIBXML_ZLIB_ENABLED 1031 return(1); 1032 #else 1033 return(0); 1034 #endif 1035 case XML_WITH_LZMA: 1036 #ifdef LIBXML_LZMA_ENABLED 1037 return(1); 1038 #else 1039 return(0); 1040 #endif 1041 case XML_WITH_ICU: 1042 #ifdef LIBXML_ICU_ENABLED 1043 return(1); 1044 #else 1045 return(0); 1046 #endif 1047 default: 1048 break; 1049 } 1050 return(0); 1051 } 1052 1053 /************************************************************************ 1054 * * 1055 * SAX2 defaulted attributes handling * 1056 * * 1057 ************************************************************************/ 1058 1059 /** 1060 * xmlDetectSAX2: 1061 * @ctxt: an XML parser context 1062 * 1063 * Do the SAX2 detection and specific intialization 1064 */ 1065 static void 1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 1067 if (ctxt == NULL) return; 1068 #ifdef LIBXML_SAX1_ENABLED 1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 1070 ((ctxt->sax->startElementNs != NULL) || 1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 1072 #else 1073 ctxt->sax2 = 1; 1074 #endif /* LIBXML_SAX1_ENABLED */ 1075 1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 1080 (ctxt->str_xml_ns == NULL)) { 1081 xmlErrMemory(ctxt, NULL); 1082 } 1083 } 1084 1085 typedef struct _xmlDefAttrs xmlDefAttrs; 1086 typedef xmlDefAttrs *xmlDefAttrsPtr; 1087 struct _xmlDefAttrs { 1088 int nbAttrs; /* number of defaulted attributes on that element */ 1089 int maxAttrs; /* the size of the array */ 1090 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1091 }; 1092 1093 /** 1094 * xmlAttrNormalizeSpace: 1095 * @src: the source string 1096 * @dst: the target string 1097 * 1098 * Normalize the space in non CDATA attribute values: 1099 * If the attribute type is not CDATA, then the XML processor MUST further 1100 * process the normalized attribute value by discarding any leading and 1101 * trailing space (#x20) characters, and by replacing sequences of space 1102 * (#x20) characters by a single space (#x20) character. 1103 * Note that the size of dst need to be at least src, and if one doesn't need 1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1105 * passing src as dst is just fine. 1106 * 1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1108 * is needed. 1109 */ 1110 static xmlChar * 1111 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1112 { 1113 if ((src == NULL) || (dst == NULL)) 1114 return(NULL); 1115 1116 while (*src == 0x20) src++; 1117 while (*src != 0) { 1118 if (*src == 0x20) { 1119 while (*src == 0x20) src++; 1120 if (*src != 0) 1121 *dst++ = 0x20; 1122 } else { 1123 *dst++ = *src++; 1124 } 1125 } 1126 *dst = 0; 1127 if (dst == src) 1128 return(NULL); 1129 return(dst); 1130 } 1131 1132 /** 1133 * xmlAttrNormalizeSpace2: 1134 * @src: the source string 1135 * 1136 * Normalize the space in non CDATA attribute values, a slightly more complex 1137 * front end to avoid allocation problems when running on attribute values 1138 * coming from the input. 1139 * 1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1141 * is needed. 1142 */ 1143 static const xmlChar * 1144 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1145 { 1146 int i; 1147 int remove_head = 0; 1148 int need_realloc = 0; 1149 const xmlChar *cur; 1150 1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1152 return(NULL); 1153 i = *len; 1154 if (i <= 0) 1155 return(NULL); 1156 1157 cur = src; 1158 while (*cur == 0x20) { 1159 cur++; 1160 remove_head++; 1161 } 1162 while (*cur != 0) { 1163 if (*cur == 0x20) { 1164 cur++; 1165 if ((*cur == 0x20) || (*cur == 0)) { 1166 need_realloc = 1; 1167 break; 1168 } 1169 } else 1170 cur++; 1171 } 1172 if (need_realloc) { 1173 xmlChar *ret; 1174 1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1176 if (ret == NULL) { 1177 xmlErrMemory(ctxt, NULL); 1178 return(NULL); 1179 } 1180 xmlAttrNormalizeSpace(ret, ret); 1181 *len = (int) strlen((const char *)ret); 1182 return(ret); 1183 } else if (remove_head) { 1184 *len -= remove_head; 1185 memmove(src, src + remove_head, 1 + *len); 1186 return(src); 1187 } 1188 return(NULL); 1189 } 1190 1191 /** 1192 * xmlAddDefAttrs: 1193 * @ctxt: an XML parser context 1194 * @fullname: the element fullname 1195 * @fullattr: the attribute fullname 1196 * @value: the attribute value 1197 * 1198 * Add a defaulted attribute for an element 1199 */ 1200 static void 1201 xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1202 const xmlChar *fullname, 1203 const xmlChar *fullattr, 1204 const xmlChar *value) { 1205 xmlDefAttrsPtr defaults; 1206 int len; 1207 const xmlChar *name; 1208 const xmlChar *prefix; 1209 1210 /* 1211 * Allows to detect attribute redefinitions 1212 */ 1213 if (ctxt->attsSpecial != NULL) { 1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1215 return; 1216 } 1217 1218 if (ctxt->attsDefault == NULL) { 1219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1220 if (ctxt->attsDefault == NULL) 1221 goto mem_error; 1222 } 1223 1224 /* 1225 * split the element name into prefix:localname , the string found 1226 * are within the DTD and then not associated to namespace names. 1227 */ 1228 name = xmlSplitQName3(fullname, &len); 1229 if (name == NULL) { 1230 name = xmlDictLookup(ctxt->dict, fullname, -1); 1231 prefix = NULL; 1232 } else { 1233 name = xmlDictLookup(ctxt->dict, name, -1); 1234 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1235 } 1236 1237 /* 1238 * make sure there is some storage 1239 */ 1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1241 if (defaults == NULL) { 1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1243 (4 * 5) * sizeof(const xmlChar *)); 1244 if (defaults == NULL) 1245 goto mem_error; 1246 defaults->nbAttrs = 0; 1247 defaults->maxAttrs = 4; 1248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1249 defaults, NULL) < 0) { 1250 xmlFree(defaults); 1251 goto mem_error; 1252 } 1253 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1254 xmlDefAttrsPtr temp; 1255 1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1258 if (temp == NULL) 1259 goto mem_error; 1260 defaults = temp; 1261 defaults->maxAttrs *= 2; 1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1263 defaults, NULL) < 0) { 1264 xmlFree(defaults); 1265 goto mem_error; 1266 } 1267 } 1268 1269 /* 1270 * Split the element name into prefix:localname , the string found 1271 * are within the DTD and hen not associated to namespace names. 1272 */ 1273 name = xmlSplitQName3(fullattr, &len); 1274 if (name == NULL) { 1275 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1276 prefix = NULL; 1277 } else { 1278 name = xmlDictLookup(ctxt->dict, name, -1); 1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1280 } 1281 1282 defaults->values[5 * defaults->nbAttrs] = name; 1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1284 /* intern the string and precompute the end */ 1285 len = xmlStrlen(value); 1286 value = xmlDictLookup(ctxt->dict, value, len); 1287 defaults->values[5 * defaults->nbAttrs + 2] = value; 1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1289 if (ctxt->external) 1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1291 else 1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1293 defaults->nbAttrs++; 1294 1295 return; 1296 1297 mem_error: 1298 xmlErrMemory(ctxt, NULL); 1299 return; 1300 } 1301 1302 /** 1303 * xmlAddSpecialAttr: 1304 * @ctxt: an XML parser context 1305 * @fullname: the element fullname 1306 * @fullattr: the attribute fullname 1307 * @type: the attribute type 1308 * 1309 * Register this attribute type 1310 */ 1311 static void 1312 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1313 const xmlChar *fullname, 1314 const xmlChar *fullattr, 1315 int type) 1316 { 1317 if (ctxt->attsSpecial == NULL) { 1318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1319 if (ctxt->attsSpecial == NULL) 1320 goto mem_error; 1321 } 1322 1323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1324 return; 1325 1326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1327 (void *) (long) type); 1328 return; 1329 1330 mem_error: 1331 xmlErrMemory(ctxt, NULL); 1332 return; 1333 } 1334 1335 /** 1336 * xmlCleanSpecialAttrCallback: 1337 * 1338 * Removes CDATA attributes from the special attribute table 1339 */ 1340 static void 1341 xmlCleanSpecialAttrCallback(void *payload, void *data, 1342 const xmlChar *fullname, const xmlChar *fullattr, 1343 const xmlChar *unused ATTRIBUTE_UNUSED) { 1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1345 1346 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1348 } 1349 } 1350 1351 /** 1352 * xmlCleanSpecialAttr: 1353 * @ctxt: an XML parser context 1354 * 1355 * Trim the list of attributes defined to remove all those of type 1356 * CDATA as they are not special. This call should be done when finishing 1357 * to parse the DTD and before starting to parse the document root. 1358 */ 1359 static void 1360 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1361 { 1362 if (ctxt->attsSpecial == NULL) 1363 return; 1364 1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1366 1367 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1368 xmlHashFree(ctxt->attsSpecial, NULL); 1369 ctxt->attsSpecial = NULL; 1370 } 1371 return; 1372 } 1373 1374 /** 1375 * xmlCheckLanguageID: 1376 * @lang: pointer to the string value 1377 * 1378 * Checks that the value conforms to the LanguageID production: 1379 * 1380 * NOTE: this is somewhat deprecated, those productions were removed from 1381 * the XML Second edition. 1382 * 1383 * [33] LanguageID ::= Langcode ('-' Subcode)* 1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1388 * [38] Subcode ::= ([a-z] | [A-Z])+ 1389 * 1390 * The current REC reference the sucessors of RFC 1766, currently 5646 1391 * 1392 * http://www.rfc-editor.org/rfc/rfc5646.txt 1393 * langtag = language 1394 * ["-" script] 1395 * ["-" region] 1396 * *("-" variant) 1397 * *("-" extension) 1398 * ["-" privateuse] 1399 * language = 2*3ALPHA ; shortest ISO 639 code 1400 * ["-" extlang] ; sometimes followed by 1401 * ; extended language subtags 1402 * / 4ALPHA ; or reserved for future use 1403 * / 5*8ALPHA ; or registered language subtag 1404 * 1405 * extlang = 3ALPHA ; selected ISO 639 codes 1406 * *2("-" 3ALPHA) ; permanently reserved 1407 * 1408 * script = 4ALPHA ; ISO 15924 code 1409 * 1410 * region = 2ALPHA ; ISO 3166-1 code 1411 * / 3DIGIT ; UN M.49 code 1412 * 1413 * variant = 5*8alphanum ; registered variants 1414 * / (DIGIT 3alphanum) 1415 * 1416 * extension = singleton 1*("-" (2*8alphanum)) 1417 * 1418 * ; Single alphanumerics 1419 * ; "x" reserved for private use 1420 * singleton = DIGIT ; 0 - 9 1421 * / %x41-57 ; A - W 1422 * / %x59-5A ; Y - Z 1423 * / %x61-77 ; a - w 1424 * / %x79-7A ; y - z 1425 * 1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too 1427 * The parser below doesn't try to cope with extension or privateuse 1428 * that could be added but that's not interoperable anyway 1429 * 1430 * Returns 1 if correct 0 otherwise 1431 **/ 1432 int 1433 xmlCheckLanguageID(const xmlChar * lang) 1434 { 1435 const xmlChar *cur = lang, *nxt; 1436 1437 if (cur == NULL) 1438 return (0); 1439 if (((cur[0] == 'i') && (cur[1] == '-')) || 1440 ((cur[0] == 'I') && (cur[1] == '-')) || 1441 ((cur[0] == 'x') && (cur[1] == '-')) || 1442 ((cur[0] == 'X') && (cur[1] == '-'))) { 1443 /* 1444 * Still allow IANA code and user code which were coming 1445 * from the previous version of the XML-1.0 specification 1446 * it's deprecated but we should not fail 1447 */ 1448 cur += 2; 1449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1450 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1451 cur++; 1452 return(cur[0] == 0); 1453 } 1454 nxt = cur; 1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1457 nxt++; 1458 if (nxt - cur >= 4) { 1459 /* 1460 * Reserved 1461 */ 1462 if ((nxt - cur > 8) || (nxt[0] != 0)) 1463 return(0); 1464 return(1); 1465 } 1466 if (nxt - cur < 2) 1467 return(0); 1468 /* we got an ISO 639 code */ 1469 if (nxt[0] == 0) 1470 return(1); 1471 if (nxt[0] != '-') 1472 return(0); 1473 1474 nxt++; 1475 cur = nxt; 1476 /* now we can have extlang or script or region or variant */ 1477 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1478 goto region_m49; 1479 1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1482 nxt++; 1483 if (nxt - cur == 4) 1484 goto script; 1485 if (nxt - cur == 2) 1486 goto region; 1487 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1488 goto variant; 1489 if (nxt - cur != 3) 1490 return(0); 1491 /* we parsed an extlang */ 1492 if (nxt[0] == 0) 1493 return(1); 1494 if (nxt[0] != '-') 1495 return(0); 1496 1497 nxt++; 1498 cur = nxt; 1499 /* now we can have script or region or variant */ 1500 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1501 goto region_m49; 1502 1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1505 nxt++; 1506 if (nxt - cur == 2) 1507 goto region; 1508 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1509 goto variant; 1510 if (nxt - cur != 4) 1511 return(0); 1512 /* we parsed a script */ 1513 script: 1514 if (nxt[0] == 0) 1515 return(1); 1516 if (nxt[0] != '-') 1517 return(0); 1518 1519 nxt++; 1520 cur = nxt; 1521 /* now we can have region or variant */ 1522 if ((nxt[0] >= '0') && (nxt[0] <= '9')) 1523 goto region_m49; 1524 1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1527 nxt++; 1528 1529 if ((nxt - cur >= 5) && (nxt - cur <= 8)) 1530 goto variant; 1531 if (nxt - cur != 2) 1532 return(0); 1533 /* we parsed a region */ 1534 region: 1535 if (nxt[0] == 0) 1536 return(1); 1537 if (nxt[0] != '-') 1538 return(0); 1539 1540 nxt++; 1541 cur = nxt; 1542 /* now we can just have a variant */ 1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || 1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) 1545 nxt++; 1546 1547 if ((nxt - cur < 5) || (nxt - cur > 8)) 1548 return(0); 1549 1550 /* we parsed a variant */ 1551 variant: 1552 if (nxt[0] == 0) 1553 return(1); 1554 if (nxt[0] != '-') 1555 return(0); 1556 /* extensions and private use subtags not checked */ 1557 return (1); 1558 1559 region_m49: 1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) && 1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) { 1562 nxt += 3; 1563 goto region; 1564 } 1565 return(0); 1566 } 1567 1568 /************************************************************************ 1569 * * 1570 * Parser stacks related functions and macros * 1571 * * 1572 ************************************************************************/ 1573 1574 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1575 const xmlChar ** str); 1576 1577 #ifdef SAX2 1578 /** 1579 * nsPush: 1580 * @ctxt: an XML parser context 1581 * @prefix: the namespace prefix or NULL 1582 * @URL: the namespace name 1583 * 1584 * Pushes a new parser namespace on top of the ns stack 1585 * 1586 * Returns -1 in case of error, -2 if the namespace should be discarded 1587 * and the index in the stack otherwise. 1588 */ 1589 static int 1590 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1591 { 1592 if (ctxt->options & XML_PARSE_NSCLEAN) { 1593 int i; 1594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { 1595 if (ctxt->nsTab[i] == prefix) { 1596 /* in scope */ 1597 if (ctxt->nsTab[i + 1] == URL) 1598 return(-2); 1599 /* out of scope keep it */ 1600 break; 1601 } 1602 } 1603 } 1604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1605 ctxt->nsMax = 10; 1606 ctxt->nsNr = 0; 1607 ctxt->nsTab = (const xmlChar **) 1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1609 if (ctxt->nsTab == NULL) { 1610 xmlErrMemory(ctxt, NULL); 1611 ctxt->nsMax = 0; 1612 return (-1); 1613 } 1614 } else if (ctxt->nsNr >= ctxt->nsMax) { 1615 const xmlChar ** tmp; 1616 ctxt->nsMax *= 2; 1617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1618 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1619 if (tmp == NULL) { 1620 xmlErrMemory(ctxt, NULL); 1621 ctxt->nsMax /= 2; 1622 return (-1); 1623 } 1624 ctxt->nsTab = tmp; 1625 } 1626 ctxt->nsTab[ctxt->nsNr++] = prefix; 1627 ctxt->nsTab[ctxt->nsNr++] = URL; 1628 return (ctxt->nsNr); 1629 } 1630 /** 1631 * nsPop: 1632 * @ctxt: an XML parser context 1633 * @nr: the number to pop 1634 * 1635 * Pops the top @nr parser prefix/namespace from the ns stack 1636 * 1637 * Returns the number of namespaces removed 1638 */ 1639 static int 1640 nsPop(xmlParserCtxtPtr ctxt, int nr) 1641 { 1642 int i; 1643 1644 if (ctxt->nsTab == NULL) return(0); 1645 if (ctxt->nsNr < nr) { 1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1647 nr = ctxt->nsNr; 1648 } 1649 if (ctxt->nsNr <= 0) 1650 return (0); 1651 1652 for (i = 0;i < nr;i++) { 1653 ctxt->nsNr--; 1654 ctxt->nsTab[ctxt->nsNr] = NULL; 1655 } 1656 return(nr); 1657 } 1658 #endif 1659 1660 static int 1661 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1662 const xmlChar **atts; 1663 int *attallocs; 1664 int maxatts; 1665 1666 if (ctxt->atts == NULL) { 1667 maxatts = 55; /* allow for 10 attrs by default */ 1668 atts = (const xmlChar **) 1669 xmlMalloc(maxatts * sizeof(xmlChar *)); 1670 if (atts == NULL) goto mem_error; 1671 ctxt->atts = atts; 1672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1673 if (attallocs == NULL) goto mem_error; 1674 ctxt->attallocs = attallocs; 1675 ctxt->maxatts = maxatts; 1676 } else if (nr + 5 > ctxt->maxatts) { 1677 maxatts = (nr + 5) * 2; 1678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1679 maxatts * sizeof(const xmlChar *)); 1680 if (atts == NULL) goto mem_error; 1681 ctxt->atts = atts; 1682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1683 (maxatts / 5) * sizeof(int)); 1684 if (attallocs == NULL) goto mem_error; 1685 ctxt->attallocs = attallocs; 1686 ctxt->maxatts = maxatts; 1687 } 1688 return(ctxt->maxatts); 1689 mem_error: 1690 xmlErrMemory(ctxt, NULL); 1691 return(-1); 1692 } 1693 1694 /** 1695 * inputPush: 1696 * @ctxt: an XML parser context 1697 * @value: the parser input 1698 * 1699 * Pushes a new parser input on top of the input stack 1700 * 1701 * Returns -1 in case of error, the index in the stack otherwise 1702 */ 1703 int 1704 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1705 { 1706 if ((ctxt == NULL) || (value == NULL)) 1707 return(-1); 1708 if (ctxt->inputNr >= ctxt->inputMax) { 1709 ctxt->inputMax *= 2; 1710 ctxt->inputTab = 1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1712 ctxt->inputMax * 1713 sizeof(ctxt->inputTab[0])); 1714 if (ctxt->inputTab == NULL) { 1715 xmlErrMemory(ctxt, NULL); 1716 xmlFreeInputStream(value); 1717 ctxt->inputMax /= 2; 1718 value = NULL; 1719 return (-1); 1720 } 1721 } 1722 ctxt->inputTab[ctxt->inputNr] = value; 1723 ctxt->input = value; 1724 return (ctxt->inputNr++); 1725 } 1726 /** 1727 * inputPop: 1728 * @ctxt: an XML parser context 1729 * 1730 * Pops the top parser input from the input stack 1731 * 1732 * Returns the input just removed 1733 */ 1734 xmlParserInputPtr 1735 inputPop(xmlParserCtxtPtr ctxt) 1736 { 1737 xmlParserInputPtr ret; 1738 1739 if (ctxt == NULL) 1740 return(NULL); 1741 if (ctxt->inputNr <= 0) 1742 return (NULL); 1743 ctxt->inputNr--; 1744 if (ctxt->inputNr > 0) 1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1746 else 1747 ctxt->input = NULL; 1748 ret = ctxt->inputTab[ctxt->inputNr]; 1749 ctxt->inputTab[ctxt->inputNr] = NULL; 1750 return (ret); 1751 } 1752 /** 1753 * nodePush: 1754 * @ctxt: an XML parser context 1755 * @value: the element node 1756 * 1757 * Pushes a new element node on top of the node stack 1758 * 1759 * Returns -1 in case of error, the index in the stack otherwise 1760 */ 1761 int 1762 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1763 { 1764 if (ctxt == NULL) return(0); 1765 if (ctxt->nodeNr >= ctxt->nodeMax) { 1766 xmlNodePtr *tmp; 1767 1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1769 ctxt->nodeMax * 2 * 1770 sizeof(ctxt->nodeTab[0])); 1771 if (tmp == NULL) { 1772 xmlErrMemory(ctxt, NULL); 1773 return (-1); 1774 } 1775 ctxt->nodeTab = tmp; 1776 ctxt->nodeMax *= 2; 1777 } 1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1782 xmlParserMaxDepth); 1783 xmlHaltParser(ctxt); 1784 return(-1); 1785 } 1786 ctxt->nodeTab[ctxt->nodeNr] = value; 1787 ctxt->node = value; 1788 return (ctxt->nodeNr++); 1789 } 1790 1791 /** 1792 * nodePop: 1793 * @ctxt: an XML parser context 1794 * 1795 * Pops the top element node from the node stack 1796 * 1797 * Returns the node just removed 1798 */ 1799 xmlNodePtr 1800 nodePop(xmlParserCtxtPtr ctxt) 1801 { 1802 xmlNodePtr ret; 1803 1804 if (ctxt == NULL) return(NULL); 1805 if (ctxt->nodeNr <= 0) 1806 return (NULL); 1807 ctxt->nodeNr--; 1808 if (ctxt->nodeNr > 0) 1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1810 else 1811 ctxt->node = NULL; 1812 ret = ctxt->nodeTab[ctxt->nodeNr]; 1813 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1814 return (ret); 1815 } 1816 1817 #ifdef LIBXML_PUSH_ENABLED 1818 /** 1819 * nameNsPush: 1820 * @ctxt: an XML parser context 1821 * @value: the element name 1822 * @prefix: the element prefix 1823 * @URI: the element namespace name 1824 * 1825 * Pushes a new element name/prefix/URL on top of the name stack 1826 * 1827 * Returns -1 in case of error, the index in the stack otherwise 1828 */ 1829 static int 1830 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1831 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1832 { 1833 if (ctxt->nameNr >= ctxt->nameMax) { 1834 const xmlChar * *tmp; 1835 void **tmp2; 1836 ctxt->nameMax *= 2; 1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1838 ctxt->nameMax * 1839 sizeof(ctxt->nameTab[0])); 1840 if (tmp == NULL) { 1841 ctxt->nameMax /= 2; 1842 goto mem_error; 1843 } 1844 ctxt->nameTab = tmp; 1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1846 ctxt->nameMax * 3 * 1847 sizeof(ctxt->pushTab[0])); 1848 if (tmp2 == NULL) { 1849 ctxt->nameMax /= 2; 1850 goto mem_error; 1851 } 1852 ctxt->pushTab = tmp2; 1853 } 1854 ctxt->nameTab[ctxt->nameNr] = value; 1855 ctxt->name = value; 1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1859 return (ctxt->nameNr++); 1860 mem_error: 1861 xmlErrMemory(ctxt, NULL); 1862 return (-1); 1863 } 1864 /** 1865 * nameNsPop: 1866 * @ctxt: an XML parser context 1867 * 1868 * Pops the top element/prefix/URI name from the name stack 1869 * 1870 * Returns the name just removed 1871 */ 1872 static const xmlChar * 1873 nameNsPop(xmlParserCtxtPtr ctxt) 1874 { 1875 const xmlChar *ret; 1876 1877 if (ctxt->nameNr <= 0) 1878 return (NULL); 1879 ctxt->nameNr--; 1880 if (ctxt->nameNr > 0) 1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1882 else 1883 ctxt->name = NULL; 1884 ret = ctxt->nameTab[ctxt->nameNr]; 1885 ctxt->nameTab[ctxt->nameNr] = NULL; 1886 return (ret); 1887 } 1888 #endif /* LIBXML_PUSH_ENABLED */ 1889 1890 /** 1891 * namePush: 1892 * @ctxt: an XML parser context 1893 * @value: the element name 1894 * 1895 * Pushes a new element name on top of the name stack 1896 * 1897 * Returns -1 in case of error, the index in the stack otherwise 1898 */ 1899 int 1900 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1901 { 1902 if (ctxt == NULL) return (-1); 1903 1904 if (ctxt->nameNr >= ctxt->nameMax) { 1905 const xmlChar * *tmp; 1906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1907 ctxt->nameMax * 2 * 1908 sizeof(ctxt->nameTab[0])); 1909 if (tmp == NULL) { 1910 goto mem_error; 1911 } 1912 ctxt->nameTab = tmp; 1913 ctxt->nameMax *= 2; 1914 } 1915 ctxt->nameTab[ctxt->nameNr] = value; 1916 ctxt->name = value; 1917 return (ctxt->nameNr++); 1918 mem_error: 1919 xmlErrMemory(ctxt, NULL); 1920 return (-1); 1921 } 1922 /** 1923 * namePop: 1924 * @ctxt: an XML parser context 1925 * 1926 * Pops the top element name from the name stack 1927 * 1928 * Returns the name just removed 1929 */ 1930 const xmlChar * 1931 namePop(xmlParserCtxtPtr ctxt) 1932 { 1933 const xmlChar *ret; 1934 1935 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1936 return (NULL); 1937 ctxt->nameNr--; 1938 if (ctxt->nameNr > 0) 1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1940 else 1941 ctxt->name = NULL; 1942 ret = ctxt->nameTab[ctxt->nameNr]; 1943 ctxt->nameTab[ctxt->nameNr] = NULL; 1944 return (ret); 1945 } 1946 1947 static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1948 if (ctxt->spaceNr >= ctxt->spaceMax) { 1949 int *tmp; 1950 1951 ctxt->spaceMax *= 2; 1952 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1954 if (tmp == NULL) { 1955 xmlErrMemory(ctxt, NULL); 1956 ctxt->spaceMax /=2; 1957 return(-1); 1958 } 1959 ctxt->spaceTab = tmp; 1960 } 1961 ctxt->spaceTab[ctxt->spaceNr] = val; 1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1963 return(ctxt->spaceNr++); 1964 } 1965 1966 static int spacePop(xmlParserCtxtPtr ctxt) { 1967 int ret; 1968 if (ctxt->spaceNr <= 0) return(0); 1969 ctxt->spaceNr--; 1970 if (ctxt->spaceNr > 0) 1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1972 else 1973 ctxt->space = &ctxt->spaceTab[0]; 1974 ret = ctxt->spaceTab[ctxt->spaceNr]; 1975 ctxt->spaceTab[ctxt->spaceNr] = -1; 1976 return(ret); 1977 } 1978 1979 /* 1980 * Macros for accessing the content. Those should be used only by the parser, 1981 * and not exported. 1982 * 1983 * Dirty macros, i.e. one often need to make assumption on the context to 1984 * use them 1985 * 1986 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1987 * To be used with extreme caution since operations consuming 1988 * characters may move the input buffer to a different location ! 1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1990 * This should be used internally by the parser 1991 * only to compare to ASCII values otherwise it would break when 1992 * running with UTF-8 encoding. 1993 * RAW same as CUR but in the input buffer, bypass any token 1994 * extraction that may have been done 1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1996 * to compare on ASCII based substring. 1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1998 * strings without newlines within the parser. 1999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 2000 * defined char within the parser. 2001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 2002 * 2003 * NEXT Skip to the next character, this does the proper decoding 2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 2005 * NEXTL(l) Skip the current unicode character of l xmlChars long. 2006 * CUR_CHAR(l) returns the current unicode character (int), set l 2007 * to the number of xmlChars used for the encoding [0-5]. 2008 * CUR_SCHAR same but operate on a string instead of the context 2009 * COPY_BUF copy the current unicode char to the target buffer, increment 2010 * the index 2011 * GROW, SHRINK handling of input buffers 2012 */ 2013 2014 #define RAW (*ctxt->input->cur) 2015 #define CUR (*ctxt->input->cur) 2016 #define NXT(val) ctxt->input->cur[(val)] 2017 #define CUR_PTR ctxt->input->cur 2018 #define BASE_PTR ctxt->input->base 2019 2020 #define CMP4( s, c1, c2, c3, c4 ) \ 2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 2023 #define CMP5( s, c1, c2, c3, c4, c5 ) \ 2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 2025 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 2027 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 2029 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 2031 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 2033 ((unsigned char *) s)[ 8 ] == c9 ) 2034 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 2036 ((unsigned char *) s)[ 9 ] == c10 ) 2037 2038 #define SKIP(val) do { \ 2039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 2040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2041 if ((*ctxt->input->cur == 0) && \ 2042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2043 xmlPopInput(ctxt); \ 2044 } while (0) 2045 2046 #define SKIPL(val) do { \ 2047 int skipl; \ 2048 for(skipl=0; skipl<val; skipl++) { \ 2049 if (*(ctxt->input->cur) == '\n') { \ 2050 ctxt->input->line++; ctxt->input->col = 1; \ 2051 } else ctxt->input->col++; \ 2052 ctxt->nbChars++; \ 2053 ctxt->input->cur++; \ 2054 } \ 2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2056 if ((*ctxt->input->cur == 0) && \ 2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 2058 xmlPopInput(ctxt); \ 2059 } while (0) 2060 2061 #define SHRINK if ((ctxt->progressive == 0) && \ 2062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 2064 xmlSHRINK (ctxt); 2065 2066 static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 2067 xmlParserInputShrink(ctxt->input); 2068 if ((*ctxt->input->cur == 0) && 2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2070 xmlPopInput(ctxt); 2071 } 2072 2073 #define GROW if ((ctxt->progressive == 0) && \ 2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 2075 xmlGROW (ctxt); 2076 2077 static void xmlGROW (xmlParserCtxtPtr ctxt) { 2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur; 2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base; 2080 2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || 2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && 2083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && 2084 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 2086 xmlHaltParser(ctxt); 2087 return; 2088 } 2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2090 if ((ctxt->input->cur > ctxt->input->end) || 2091 (ctxt->input->cur < ctxt->input->base)) { 2092 xmlHaltParser(ctxt); 2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); 2094 return; 2095 } 2096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 2097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2098 xmlPopInput(ctxt); 2099 } 2100 2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt) 2102 2103 #define NEXT xmlNextChar(ctxt) 2104 2105 #define NEXT1 { \ 2106 ctxt->input->col++; \ 2107 ctxt->input->cur++; \ 2108 ctxt->nbChars++; \ 2109 if (*ctxt->input->cur == 0) \ 2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 2111 } 2112 2113 #define NEXTL(l) do { \ 2114 if (*(ctxt->input->cur) == '\n') { \ 2115 ctxt->input->line++; ctxt->input->col = 1; \ 2116 } else ctxt->input->col++; \ 2117 ctxt->input->cur += l; \ 2118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 2119 } while (0) 2120 2121 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 2122 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 2123 2124 #define COPY_BUF(l,b,i,v) \ 2125 if (l == 1) b[i++] = (xmlChar) v; \ 2126 else i += xmlCopyCharMultiByte(&b[i],v) 2127 2128 /** 2129 * xmlSkipBlankChars: 2130 * @ctxt: the XML parser context 2131 * 2132 * skip all blanks character found at that point in the input streams. 2133 * It pops up finished entities in the process if allowable at that point. 2134 * 2135 * Returns the number of space chars skipped 2136 */ 2137 2138 int 2139 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 2140 int res = 0; 2141 2142 /* 2143 * It's Okay to use CUR/NEXT here since all the blanks are on 2144 * the ASCII range. 2145 */ 2146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 2147 const xmlChar *cur; 2148 /* 2149 * if we are in the document content, go really fast 2150 */ 2151 cur = ctxt->input->cur; 2152 while (IS_BLANK_CH(*cur)) { 2153 if (*cur == '\n') { 2154 ctxt->input->line++; ctxt->input->col = 1; 2155 } else { 2156 ctxt->input->col++; 2157 } 2158 cur++; 2159 res++; 2160 if (*cur == 0) { 2161 ctxt->input->cur = cur; 2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 2163 cur = ctxt->input->cur; 2164 } 2165 } 2166 ctxt->input->cur = cur; 2167 } else { 2168 int cur; 2169 do { 2170 cur = CUR; 2171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ 2172 (ctxt->instate != XML_PARSER_EOF))) { 2173 NEXT; 2174 cur = CUR; 2175 res++; 2176 } 2177 while ((cur == 0) && (ctxt->inputNr > 1) && 2178 (ctxt->instate != XML_PARSER_COMMENT)) { 2179 xmlPopInput(ctxt); 2180 cur = CUR; 2181 } 2182 /* 2183 * Need to handle support of entities branching here 2184 */ 2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 2186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ 2187 (ctxt->instate != XML_PARSER_EOF)); 2188 } 2189 return(res); 2190 } 2191 2192 /************************************************************************ 2193 * * 2194 * Commodity functions to handle entities * 2195 * * 2196 ************************************************************************/ 2197 2198 /** 2199 * xmlPopInput: 2200 * @ctxt: an XML parser context 2201 * 2202 * xmlPopInput: the current input pointed by ctxt->input came to an end 2203 * pop it and return the next char. 2204 * 2205 * Returns the current xmlChar in the parser context 2206 */ 2207 xmlChar 2208 xmlPopInput(xmlParserCtxtPtr ctxt) { 2209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 2210 if (xmlParserDebugEntities) 2211 xmlGenericError(xmlGenericErrorContext, 2212 "Popping input %d\n", ctxt->inputNr); 2213 xmlFreeInputStream(inputPop(ctxt)); 2214 if ((*ctxt->input->cur == 0) && 2215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 2216 return(xmlPopInput(ctxt)); 2217 return(CUR); 2218 } 2219 2220 /** 2221 * xmlPushInput: 2222 * @ctxt: an XML parser context 2223 * @input: an XML parser input fragment (entity, XML fragment ...). 2224 * 2225 * xmlPushInput: switch to a new input stream which is stacked on top 2226 * of the previous one(s). 2227 * Returns -1 in case of error or the index in the input stack 2228 */ 2229 int 2230 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 2231 int ret; 2232 if (input == NULL) return(-1); 2233 2234 if (xmlParserDebugEntities) { 2235 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2236 xmlGenericError(xmlGenericErrorContext, 2237 "%s(%d): ", ctxt->input->filename, 2238 ctxt->input->line); 2239 xmlGenericError(xmlGenericErrorContext, 2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2241 } 2242 ret = inputPush(ctxt, input); 2243 if (ctxt->instate == XML_PARSER_EOF) 2244 return(-1); 2245 GROW; 2246 return(ret); 2247 } 2248 2249 /** 2250 * xmlParseCharRef: 2251 * @ctxt: an XML parser context 2252 * 2253 * parse Reference declarations 2254 * 2255 * [66] CharRef ::= '&#' [0-9]+ ';' | 2256 * '&#x' [0-9a-fA-F]+ ';' 2257 * 2258 * [ WFC: Legal Character ] 2259 * Characters referred to using character references must match the 2260 * production for Char. 2261 * 2262 * Returns the value parsed (as an int), 0 in case of error 2263 */ 2264 int 2265 xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2266 unsigned int val = 0; 2267 int count = 0; 2268 unsigned int outofrange = 0; 2269 2270 /* 2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2272 */ 2273 if ((RAW == '&') && (NXT(1) == '#') && 2274 (NXT(2) == 'x')) { 2275 SKIP(3); 2276 GROW; 2277 while (RAW != ';') { /* loop blocked by count */ 2278 if (count++ > 20) { 2279 count = 0; 2280 GROW; 2281 if (ctxt->instate == XML_PARSER_EOF) 2282 return(0); 2283 } 2284 if ((RAW >= '0') && (RAW <= '9')) 2285 val = val * 16 + (CUR - '0'); 2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2287 val = val * 16 + (CUR - 'a') + 10; 2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2289 val = val * 16 + (CUR - 'A') + 10; 2290 else { 2291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2292 val = 0; 2293 break; 2294 } 2295 if (val > 0x10FFFF) 2296 outofrange = val; 2297 2298 NEXT; 2299 count++; 2300 } 2301 if (RAW == ';') { 2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2303 ctxt->input->col++; 2304 ctxt->nbChars ++; 2305 ctxt->input->cur++; 2306 } 2307 } else if ((RAW == '&') && (NXT(1) == '#')) { 2308 SKIP(2); 2309 GROW; 2310 while (RAW != ';') { /* loop blocked by count */ 2311 if (count++ > 20) { 2312 count = 0; 2313 GROW; 2314 if (ctxt->instate == XML_PARSER_EOF) 2315 return(0); 2316 } 2317 if ((RAW >= '0') && (RAW <= '9')) 2318 val = val * 10 + (CUR - '0'); 2319 else { 2320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2321 val = 0; 2322 break; 2323 } 2324 if (val > 0x10FFFF) 2325 outofrange = val; 2326 2327 NEXT; 2328 count++; 2329 } 2330 if (RAW == ';') { 2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2332 ctxt->input->col++; 2333 ctxt->nbChars ++; 2334 ctxt->input->cur++; 2335 } 2336 } else { 2337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2338 } 2339 2340 /* 2341 * [ WFC: Legal Character ] 2342 * Characters referred to using character references must match the 2343 * production for Char. 2344 */ 2345 if ((IS_CHAR(val) && (outofrange == 0))) { 2346 return(val); 2347 } else { 2348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2349 "xmlParseCharRef: invalid xmlChar value %d\n", 2350 val); 2351 } 2352 return(0); 2353 } 2354 2355 /** 2356 * xmlParseStringCharRef: 2357 * @ctxt: an XML parser context 2358 * @str: a pointer to an index in the string 2359 * 2360 * parse Reference declarations, variant parsing from a string rather 2361 * than an an input flow. 2362 * 2363 * [66] CharRef ::= '&#' [0-9]+ ';' | 2364 * '&#x' [0-9a-fA-F]+ ';' 2365 * 2366 * [ WFC: Legal Character ] 2367 * Characters referred to using character references must match the 2368 * production for Char. 2369 * 2370 * Returns the value parsed (as an int), 0 in case of error, str will be 2371 * updated to the current value of the index 2372 */ 2373 static int 2374 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2375 const xmlChar *ptr; 2376 xmlChar cur; 2377 unsigned int val = 0; 2378 unsigned int outofrange = 0; 2379 2380 if ((str == NULL) || (*str == NULL)) return(0); 2381 ptr = *str; 2382 cur = *ptr; 2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2384 ptr += 3; 2385 cur = *ptr; 2386 while (cur != ';') { /* Non input consuming loop */ 2387 if ((cur >= '0') && (cur <= '9')) 2388 val = val * 16 + (cur - '0'); 2389 else if ((cur >= 'a') && (cur <= 'f')) 2390 val = val * 16 + (cur - 'a') + 10; 2391 else if ((cur >= 'A') && (cur <= 'F')) 2392 val = val * 16 + (cur - 'A') + 10; 2393 else { 2394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2395 val = 0; 2396 break; 2397 } 2398 if (val > 0x10FFFF) 2399 outofrange = val; 2400 2401 ptr++; 2402 cur = *ptr; 2403 } 2404 if (cur == ';') 2405 ptr++; 2406 } else if ((cur == '&') && (ptr[1] == '#')){ 2407 ptr += 2; 2408 cur = *ptr; 2409 while (cur != ';') { /* Non input consuming loops */ 2410 if ((cur >= '0') && (cur <= '9')) 2411 val = val * 10 + (cur - '0'); 2412 else { 2413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2414 val = 0; 2415 break; 2416 } 2417 if (val > 0x10FFFF) 2418 outofrange = val; 2419 2420 ptr++; 2421 cur = *ptr; 2422 } 2423 if (cur == ';') 2424 ptr++; 2425 } else { 2426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2427 return(0); 2428 } 2429 *str = ptr; 2430 2431 /* 2432 * [ WFC: Legal Character ] 2433 * Characters referred to using character references must match the 2434 * production for Char. 2435 */ 2436 if ((IS_CHAR(val) && (outofrange == 0))) { 2437 return(val); 2438 } else { 2439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2440 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2441 val); 2442 } 2443 return(0); 2444 } 2445 2446 /** 2447 * xmlNewBlanksWrapperInputStream: 2448 * @ctxt: an XML parser context 2449 * @entity: an Entity pointer 2450 * 2451 * Create a new input stream for wrapping 2452 * blanks around a PEReference 2453 * 2454 * Returns the new input stream or NULL 2455 */ 2456 2457 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2458 2459 static xmlParserInputPtr 2460 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2461 xmlParserInputPtr input; 2462 xmlChar *buffer; 2463 size_t length; 2464 if (entity == NULL) { 2465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2466 "xmlNewBlanksWrapperInputStream entity\n"); 2467 return(NULL); 2468 } 2469 if (xmlParserDebugEntities) 2470 xmlGenericError(xmlGenericErrorContext, 2471 "new blanks wrapper for entity: %s\n", entity->name); 2472 input = xmlNewInputStream(ctxt); 2473 if (input == NULL) { 2474 return(NULL); 2475 } 2476 length = xmlStrlen(entity->name) + 5; 2477 buffer = xmlMallocAtomic(length); 2478 if (buffer == NULL) { 2479 xmlErrMemory(ctxt, NULL); 2480 xmlFree(input); 2481 return(NULL); 2482 } 2483 buffer [0] = ' '; 2484 buffer [1] = '%'; 2485 buffer [length-3] = ';'; 2486 buffer [length-2] = ' '; 2487 buffer [length-1] = 0; 2488 memcpy(buffer + 2, entity->name, length - 5); 2489 input->free = deallocblankswrapper; 2490 input->base = buffer; 2491 input->cur = buffer; 2492 input->length = length; 2493 input->end = &buffer[length]; 2494 return(input); 2495 } 2496 2497 /** 2498 * xmlParserHandlePEReference: 2499 * @ctxt: the parser context 2500 * 2501 * [69] PEReference ::= '%' Name ';' 2502 * 2503 * [ WFC: No Recursion ] 2504 * A parsed entity must not contain a recursive 2505 * reference to itself, either directly or indirectly. 2506 * 2507 * [ WFC: Entity Declared ] 2508 * In a document without any DTD, a document with only an internal DTD 2509 * subset which contains no parameter entity references, or a document 2510 * with "standalone='yes'", ... ... The declaration of a parameter 2511 * entity must precede any reference to it... 2512 * 2513 * [ VC: Entity Declared ] 2514 * In a document with an external subset or external parameter entities 2515 * with "standalone='no'", ... ... The declaration of a parameter entity 2516 * must precede any reference to it... 2517 * 2518 * [ WFC: In DTD ] 2519 * Parameter-entity references may only appear in the DTD. 2520 * NOTE: misleading but this is handled. 2521 * 2522 * A PEReference may have been detected in the current input stream 2523 * the handling is done accordingly to 2524 * http://www.w3.org/TR/REC-xml#entproc 2525 * i.e. 2526 * - Included in literal in entity values 2527 * - Included as Parameter Entity reference within DTDs 2528 */ 2529 void 2530 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2531 const xmlChar *name; 2532 xmlEntityPtr entity = NULL; 2533 xmlParserInputPtr input; 2534 2535 if (RAW != '%') return; 2536 switch(ctxt->instate) { 2537 case XML_PARSER_CDATA_SECTION: 2538 return; 2539 case XML_PARSER_COMMENT: 2540 return; 2541 case XML_PARSER_START_TAG: 2542 return; 2543 case XML_PARSER_END_TAG: 2544 return; 2545 case XML_PARSER_EOF: 2546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2547 return; 2548 case XML_PARSER_PROLOG: 2549 case XML_PARSER_START: 2550 case XML_PARSER_MISC: 2551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2552 return; 2553 case XML_PARSER_ENTITY_DECL: 2554 case XML_PARSER_CONTENT: 2555 case XML_PARSER_ATTRIBUTE_VALUE: 2556 case XML_PARSER_PI: 2557 case XML_PARSER_SYSTEM_LITERAL: 2558 case XML_PARSER_PUBLIC_LITERAL: 2559 /* we just ignore it there */ 2560 return; 2561 case XML_PARSER_EPILOG: 2562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2563 return; 2564 case XML_PARSER_ENTITY_VALUE: 2565 /* 2566 * NOTE: in the case of entity values, we don't do the 2567 * substitution here since we need the literal 2568 * entity value to be able to save the internal 2569 * subset of the document. 2570 * This will be handled by xmlStringDecodeEntities 2571 */ 2572 return; 2573 case XML_PARSER_DTD: 2574 /* 2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2576 * In the internal DTD subset, parameter-entity references 2577 * can occur only where markup declarations can occur, not 2578 * within markup declarations. 2579 * In that case this is handled in xmlParseMarkupDecl 2580 */ 2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2582 return; 2583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2584 return; 2585 break; 2586 case XML_PARSER_IGNORE: 2587 return; 2588 } 2589 2590 NEXT; 2591 name = xmlParseName(ctxt); 2592 if (xmlParserDebugEntities) 2593 xmlGenericError(xmlGenericErrorContext, 2594 "PEReference: %s\n", name); 2595 if (name == NULL) { 2596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2597 } else { 2598 if (RAW == ';') { 2599 NEXT; 2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2602 if (ctxt->instate == XML_PARSER_EOF) 2603 return; 2604 if (entity == NULL) { 2605 2606 /* 2607 * [ WFC: Entity Declared ] 2608 * In a document without any DTD, a document with only an 2609 * internal DTD subset which contains no parameter entity 2610 * references, or a document with "standalone='yes'", ... 2611 * ... The declaration of a parameter entity must precede 2612 * any reference to it... 2613 */ 2614 if ((ctxt->standalone == 1) || 2615 ((ctxt->hasExternalSubset == 0) && 2616 (ctxt->hasPErefs == 0))) { 2617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2618 "PEReference: %%%s; not found\n", name); 2619 } else { 2620 /* 2621 * [ VC: Entity Declared ] 2622 * In a document with an external subset or external 2623 * parameter entities with "standalone='no'", ... 2624 * ... The declaration of a parameter entity must precede 2625 * any reference to it... 2626 */ 2627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2629 "PEReference: %%%s; not found\n", 2630 name, NULL); 2631 } else 2632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2633 "PEReference: %%%s; not found\n", 2634 name, NULL); 2635 ctxt->valid = 0; 2636 } 2637 xmlParserEntityCheck(ctxt, 0, NULL, 0); 2638 } else if (ctxt->input->free != deallocblankswrapper) { 2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2640 if (xmlPushInput(ctxt, input) < 0) 2641 return; 2642 } else { 2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2645 xmlChar start[4]; 2646 xmlCharEncoding enc; 2647 2648 /* 2649 * Note: external parameter entities will not be loaded, it 2650 * is not required for a non-validating parser, unless the 2651 * option of validating, or substituting entities were 2652 * given. Doing so is far more secure as the parser will 2653 * only process data coming from the document entity by 2654 * default. 2655 */ 2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2657 ((ctxt->options & XML_PARSE_NOENT) == 0) && 2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) && 2659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && 2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) && 2661 (ctxt->replaceEntities == 0) && 2662 (ctxt->validate == 0)) 2663 return; 2664 2665 /* 2666 * handle the extra spaces added before and after 2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2668 * this is done independently. 2669 */ 2670 input = xmlNewEntityInputStream(ctxt, entity); 2671 if (xmlPushInput(ctxt, input) < 0) 2672 return; 2673 2674 /* 2675 * Get the 4 first bytes and decode the charset 2676 * if enc != XML_CHAR_ENCODING_NONE 2677 * plug some encoding conversion routines. 2678 * Note that, since we may have some non-UTF8 2679 * encoding (like UTF16, bug 135229), the 'length' 2680 * is not known, but we can calculate based upon 2681 * the amount of data in the buffer. 2682 */ 2683 GROW 2684 if (ctxt->instate == XML_PARSER_EOF) 2685 return; 2686 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2687 start[0] = RAW; 2688 start[1] = NXT(1); 2689 start[2] = NXT(2); 2690 start[3] = NXT(3); 2691 enc = xmlDetectCharEncoding(start, 4); 2692 if (enc != XML_CHAR_ENCODING_NONE) { 2693 xmlSwitchEncoding(ctxt, enc); 2694 } 2695 } 2696 2697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2699 (IS_BLANK_CH(NXT(5)))) { 2700 xmlParseTextDecl(ctxt); 2701 } 2702 } else { 2703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2704 "PEReference: %s is not a parameter entity\n", 2705 name); 2706 } 2707 } 2708 } else { 2709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2710 } 2711 } 2712 } 2713 2714 /* 2715 * Macro used to grow the current buffer. 2716 * buffer##_size is expected to be a size_t 2717 * mem_error: is expected to handle memory allocation failures 2718 */ 2719 #define growBuffer(buffer, n) { \ 2720 xmlChar *tmp; \ 2721 size_t new_size = buffer##_size * 2 + n; \ 2722 if (new_size < buffer##_size) goto mem_error; \ 2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 2724 if (tmp == NULL) goto mem_error; \ 2725 buffer = tmp; \ 2726 buffer##_size = new_size; \ 2727 } 2728 2729 /** 2730 * xmlStringLenDecodeEntities: 2731 * @ctxt: the parser context 2732 * @str: the input string 2733 * @len: the string length 2734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2735 * @end: an end marker xmlChar, 0 if none 2736 * @end2: an end marker xmlChar, 0 if none 2737 * @end3: an end marker xmlChar, 0 if none 2738 * 2739 * Takes a entity string content and process to do the adequate substitutions. 2740 * 2741 * [67] Reference ::= EntityRef | CharRef 2742 * 2743 * [69] PEReference ::= '%' Name ';' 2744 * 2745 * Returns A newly allocated string with the substitution done. The caller 2746 * must deallocate it ! 2747 */ 2748 xmlChar * 2749 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2750 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2751 xmlChar *buffer = NULL; 2752 size_t buffer_size = 0; 2753 size_t nbchars = 0; 2754 2755 xmlChar *current = NULL; 2756 xmlChar *rep = NULL; 2757 const xmlChar *last; 2758 xmlEntityPtr ent; 2759 int c,l; 2760 2761 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2762 return(NULL); 2763 last = str + len; 2764 2765 if (((ctxt->depth > 40) && 2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2767 (ctxt->depth > 1024)) { 2768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2769 return(NULL); 2770 } 2771 2772 /* 2773 * allocate a translation buffer. 2774 */ 2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size); 2777 if (buffer == NULL) goto mem_error; 2778 2779 /* 2780 * OK loop until we reach one of the ending char or a size limit. 2781 * we are operating on already parsed values. 2782 */ 2783 if (str < last) 2784 c = CUR_SCHAR(str, l); 2785 else 2786 c = 0; 2787 while ((c != 0) && (c != end) && /* non input consuming loop */ 2788 (c != end2) && (c != end3)) { 2789 2790 if (c == 0) break; 2791 if ((c == '&') && (str[1] == '#')) { 2792 int val = xmlParseStringCharRef(ctxt, &str); 2793 if (val != 0) { 2794 COPY_BUF(0,buffer,nbchars,val); 2795 } 2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2798 } 2799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2800 if (xmlParserDebugEntities) 2801 xmlGenericError(xmlGenericErrorContext, 2802 "String decoding Entity Reference: %.30s\n", 2803 str); 2804 ent = xmlParseStringEntityRef(ctxt, &str); 2805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2807 goto int_error; 2808 xmlParserEntityCheck(ctxt, 0, ent, 0); 2809 if (ent != NULL) 2810 ctxt->nbentities += ent->checked / 2; 2811 if ((ent != NULL) && 2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2813 if (ent->content != NULL) { 2814 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2817 } 2818 } else { 2819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2820 "predefined entity has no content\n"); 2821 } 2822 } else if ((ent != NULL) && (ent->content != NULL)) { 2823 ctxt->depth++; 2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2825 0, 0, 0); 2826 ctxt->depth--; 2827 2828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2830 goto int_error; 2831 2832 if (rep != NULL) { 2833 current = rep; 2834 while (*current != 0) { /* non input consuming loop */ 2835 buffer[nbchars++] = *current++; 2836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2838 goto int_error; 2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2840 } 2841 } 2842 xmlFree(rep); 2843 rep = NULL; 2844 } 2845 } else if (ent != NULL) { 2846 int i = xmlStrlen(ent->name); 2847 const xmlChar *cur = ent->name; 2848 2849 buffer[nbchars++] = '&'; 2850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { 2851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); 2852 } 2853 for (;i > 0;i--) 2854 buffer[nbchars++] = *cur++; 2855 buffer[nbchars++] = ';'; 2856 } 2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2858 if (xmlParserDebugEntities) 2859 xmlGenericError(xmlGenericErrorContext, 2860 "String decoding PE Reference: %.30s\n", str); 2861 ent = xmlParseStringPEReference(ctxt, &str); 2862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2863 goto int_error; 2864 xmlParserEntityCheck(ctxt, 0, ent, 0); 2865 if (ent != NULL) 2866 ctxt->nbentities += ent->checked / 2; 2867 if (ent != NULL) { 2868 if (ent->content == NULL) { 2869 /* 2870 * Note: external parsed entities will not be loaded, 2871 * it is not required for a non-validating parser to 2872 * complete external PEreferences coming from the 2873 * internal subset 2874 */ 2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) || 2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) || 2877 (ctxt->validate != 0)) { 2878 xmlLoadEntityContent(ctxt, ent); 2879 } else { 2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, 2881 "not validating will not read content for PE entity %s\n", 2882 ent->name, NULL); 2883 } 2884 } 2885 ctxt->depth++; 2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2887 0, 0, 0); 2888 ctxt->depth--; 2889 if (rep != NULL) { 2890 current = rep; 2891 while (*current != 0) { /* non input consuming loop */ 2892 buffer[nbchars++] = *current++; 2893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) 2895 goto int_error; 2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2897 } 2898 } 2899 xmlFree(rep); 2900 rep = NULL; 2901 } 2902 } 2903 } else { 2904 COPY_BUF(l,buffer,nbchars,c); 2905 str += l; 2906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { 2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2908 } 2909 } 2910 if (str < last) 2911 c = CUR_SCHAR(str, l); 2912 else 2913 c = 0; 2914 } 2915 buffer[nbchars] = 0; 2916 return(buffer); 2917 2918 mem_error: 2919 xmlErrMemory(ctxt, NULL); 2920 int_error: 2921 if (rep != NULL) 2922 xmlFree(rep); 2923 if (buffer != NULL) 2924 xmlFree(buffer); 2925 return(NULL); 2926 } 2927 2928 /** 2929 * xmlStringDecodeEntities: 2930 * @ctxt: the parser context 2931 * @str: the input string 2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2933 * @end: an end marker xmlChar, 0 if none 2934 * @end2: an end marker xmlChar, 0 if none 2935 * @end3: an end marker xmlChar, 0 if none 2936 * 2937 * Takes a entity string content and process to do the adequate substitutions. 2938 * 2939 * [67] Reference ::= EntityRef | CharRef 2940 * 2941 * [69] PEReference ::= '%' Name ';' 2942 * 2943 * Returns A newly allocated string with the substitution done. The caller 2944 * must deallocate it ! 2945 */ 2946 xmlChar * 2947 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2948 xmlChar end, xmlChar end2, xmlChar end3) { 2949 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2951 end, end2, end3)); 2952 } 2953 2954 /************************************************************************ 2955 * * 2956 * Commodity functions, cleanup needed ? * 2957 * * 2958 ************************************************************************/ 2959 2960 /** 2961 * areBlanks: 2962 * @ctxt: an XML parser context 2963 * @str: a xmlChar * 2964 * @len: the size of @str 2965 * @blank_chars: we know the chars are blanks 2966 * 2967 * Is this a sequence of blank chars that one can ignore ? 2968 * 2969 * Returns 1 if ignorable 0 otherwise. 2970 */ 2971 2972 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2973 int blank_chars) { 2974 int i, ret; 2975 xmlNodePtr lastChild; 2976 2977 /* 2978 * Don't spend time trying to differentiate them, the same callback is 2979 * used ! 2980 */ 2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2982 return(0); 2983 2984 /* 2985 * Check for xml:space value. 2986 */ 2987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2988 (*(ctxt->space) == -2)) 2989 return(0); 2990 2991 /* 2992 * Check that the string is made of blanks 2993 */ 2994 if (blank_chars == 0) { 2995 for (i = 0;i < len;i++) 2996 if (!(IS_BLANK_CH(str[i]))) return(0); 2997 } 2998 2999 /* 3000 * Look if the element is mixed content in the DTD if available 3001 */ 3002 if (ctxt->node == NULL) return(0); 3003 if (ctxt->myDoc != NULL) { 3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 3005 if (ret == 0) return(1); 3006 if (ret == 1) return(0); 3007 } 3008 3009 /* 3010 * Otherwise, heuristic :-\ 3011 */ 3012 if ((RAW != '<') && (RAW != 0xD)) return(0); 3013 if ((ctxt->node->children == NULL) && 3014 (RAW == '<') && (NXT(1) == '/')) return(0); 3015 3016 lastChild = xmlGetLastChild(ctxt->node); 3017 if (lastChild == NULL) { 3018 if ((ctxt->node->type != XML_ELEMENT_NODE) && 3019 (ctxt->node->content != NULL)) return(0); 3020 } else if (xmlNodeIsText(lastChild)) 3021 return(0); 3022 else if ((ctxt->node->children != NULL) && 3023 (xmlNodeIsText(ctxt->node->children))) 3024 return(0); 3025 return(1); 3026 } 3027 3028 /************************************************************************ 3029 * * 3030 * Extra stuff for namespace support * 3031 * Relates to http://www.w3.org/TR/WD-xml-names * 3032 * * 3033 ************************************************************************/ 3034 3035 /** 3036 * xmlSplitQName: 3037 * @ctxt: an XML parser context 3038 * @name: an XML parser context 3039 * @prefix: a xmlChar ** 3040 * 3041 * parse an UTF8 encoded XML qualified name string 3042 * 3043 * [NS 5] QName ::= (Prefix ':')? LocalPart 3044 * 3045 * [NS 6] Prefix ::= NCName 3046 * 3047 * [NS 7] LocalPart ::= NCName 3048 * 3049 * Returns the local part, and prefix is updated 3050 * to get the Prefix if any. 3051 */ 3052 3053 xmlChar * 3054 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 3055 xmlChar buf[XML_MAX_NAMELEN + 5]; 3056 xmlChar *buffer = NULL; 3057 int len = 0; 3058 int max = XML_MAX_NAMELEN; 3059 xmlChar *ret = NULL; 3060 const xmlChar *cur = name; 3061 int c; 3062 3063 if (prefix == NULL) return(NULL); 3064 *prefix = NULL; 3065 3066 if (cur == NULL) return(NULL); 3067 3068 #ifndef XML_XML_NAMESPACE 3069 /* xml: prefix is not really a namespace */ 3070 if ((cur[0] == 'x') && (cur[1] == 'm') && 3071 (cur[2] == 'l') && (cur[3] == ':')) 3072 return(xmlStrdup(name)); 3073 #endif 3074 3075 /* nasty but well=formed */ 3076 if (cur[0] == ':') 3077 return(xmlStrdup(name)); 3078 3079 c = *cur++; 3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 3081 buf[len++] = c; 3082 c = *cur++; 3083 } 3084 if (len >= max) { 3085 /* 3086 * Okay someone managed to make a huge name, so he's ready to pay 3087 * for the processing speed. 3088 */ 3089 max = len * 2; 3090 3091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3092 if (buffer == NULL) { 3093 xmlErrMemory(ctxt, NULL); 3094 return(NULL); 3095 } 3096 memcpy(buffer, buf, len); 3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 3098 if (len + 10 > max) { 3099 xmlChar *tmp; 3100 3101 max *= 2; 3102 tmp = (xmlChar *) xmlRealloc(buffer, 3103 max * sizeof(xmlChar)); 3104 if (tmp == NULL) { 3105 xmlFree(buffer); 3106 xmlErrMemory(ctxt, NULL); 3107 return(NULL); 3108 } 3109 buffer = tmp; 3110 } 3111 buffer[len++] = c; 3112 c = *cur++; 3113 } 3114 buffer[len] = 0; 3115 } 3116 3117 if ((c == ':') && (*cur == 0)) { 3118 if (buffer != NULL) 3119 xmlFree(buffer); 3120 *prefix = NULL; 3121 return(xmlStrdup(name)); 3122 } 3123 3124 if (buffer == NULL) 3125 ret = xmlStrndup(buf, len); 3126 else { 3127 ret = buffer; 3128 buffer = NULL; 3129 max = XML_MAX_NAMELEN; 3130 } 3131 3132 3133 if (c == ':') { 3134 c = *cur; 3135 *prefix = ret; 3136 if (c == 0) { 3137 return(xmlStrndup(BAD_CAST "", 0)); 3138 } 3139 len = 0; 3140 3141 /* 3142 * Check that the first character is proper to start 3143 * a new name 3144 */ 3145 if (!(((c >= 0x61) && (c <= 0x7A)) || 3146 ((c >= 0x41) && (c <= 0x5A)) || 3147 (c == '_') || (c == ':'))) { 3148 int l; 3149 int first = CUR_SCHAR(cur, l); 3150 3151 if (!IS_LETTER(first) && (first != '_')) { 3152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 3153 "Name %s is not XML Namespace compliant\n", 3154 name); 3155 } 3156 } 3157 cur++; 3158 3159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 3160 buf[len++] = c; 3161 c = *cur++; 3162 } 3163 if (len >= max) { 3164 /* 3165 * Okay someone managed to make a huge name, so he's ready to pay 3166 * for the processing speed. 3167 */ 3168 max = len * 2; 3169 3170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3171 if (buffer == NULL) { 3172 xmlErrMemory(ctxt, NULL); 3173 return(NULL); 3174 } 3175 memcpy(buffer, buf, len); 3176 while (c != 0) { /* tested bigname2.xml */ 3177 if (len + 10 > max) { 3178 xmlChar *tmp; 3179 3180 max *= 2; 3181 tmp = (xmlChar *) xmlRealloc(buffer, 3182 max * sizeof(xmlChar)); 3183 if (tmp == NULL) { 3184 xmlErrMemory(ctxt, NULL); 3185 xmlFree(buffer); 3186 return(NULL); 3187 } 3188 buffer = tmp; 3189 } 3190 buffer[len++] = c; 3191 c = *cur++; 3192 } 3193 buffer[len] = 0; 3194 } 3195 3196 if (buffer == NULL) 3197 ret = xmlStrndup(buf, len); 3198 else { 3199 ret = buffer; 3200 } 3201 } 3202 3203 return(ret); 3204 } 3205 3206 /************************************************************************ 3207 * * 3208 * The parser itself * 3209 * Relates to http://www.w3.org/TR/REC-xml * 3210 * * 3211 ************************************************************************/ 3212 3213 /************************************************************************ 3214 * * 3215 * Routines to parse Name, NCName and NmToken * 3216 * * 3217 ************************************************************************/ 3218 #ifdef DEBUG 3219 static unsigned long nbParseName = 0; 3220 static unsigned long nbParseNmToken = 0; 3221 static unsigned long nbParseNCName = 0; 3222 static unsigned long nbParseNCNameComplex = 0; 3223 static unsigned long nbParseNameComplex = 0; 3224 static unsigned long nbParseStringName = 0; 3225 #endif 3226 3227 /* 3228 * The two following functions are related to the change of accepted 3229 * characters for Name and NmToken in the Revision 5 of XML-1.0 3230 * They correspond to the modified production [4] and the new production [4a] 3231 * changes in that revision. Also note that the macros used for the 3232 * productions Letter, Digit, CombiningChar and Extender are not needed 3233 * anymore. 3234 * We still keep compatibility to pre-revision5 parsing semantic if the 3235 * new XML_PARSE_OLD10 option is given to the parser. 3236 */ 3237 static int 3238 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3240 /* 3241 * Use the new checks of production [4] [4a] amd [5] of the 3242 * Update 5 of XML-1.0 3243 */ 3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3245 (((c >= 'a') && (c <= 'z')) || 3246 ((c >= 'A') && (c <= 'Z')) || 3247 (c == '_') || (c == ':') || 3248 ((c >= 0xC0) && (c <= 0xD6)) || 3249 ((c >= 0xD8) && (c <= 0xF6)) || 3250 ((c >= 0xF8) && (c <= 0x2FF)) || 3251 ((c >= 0x370) && (c <= 0x37D)) || 3252 ((c >= 0x37F) && (c <= 0x1FFF)) || 3253 ((c >= 0x200C) && (c <= 0x200D)) || 3254 ((c >= 0x2070) && (c <= 0x218F)) || 3255 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3256 ((c >= 0x3001) && (c <= 0xD7FF)) || 3257 ((c >= 0xF900) && (c <= 0xFDCF)) || 3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3259 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3260 return(1); 3261 } else { 3262 if (IS_LETTER(c) || (c == '_') || (c == ':')) 3263 return(1); 3264 } 3265 return(0); 3266 } 3267 3268 static int 3269 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3271 /* 3272 * Use the new checks of production [4] [4a] amd [5] of the 3273 * Update 5 of XML-1.0 3274 */ 3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3276 (((c >= 'a') && (c <= 'z')) || 3277 ((c >= 'A') && (c <= 'Z')) || 3278 ((c >= '0') && (c <= '9')) || /* !start */ 3279 (c == '_') || (c == ':') || 3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3281 ((c >= 0xC0) && (c <= 0xD6)) || 3282 ((c >= 0xD8) && (c <= 0xF6)) || 3283 ((c >= 0xF8) && (c <= 0x2FF)) || 3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3285 ((c >= 0x370) && (c <= 0x37D)) || 3286 ((c >= 0x37F) && (c <= 0x1FFF)) || 3287 ((c >= 0x200C) && (c <= 0x200D)) || 3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3289 ((c >= 0x2070) && (c <= 0x218F)) || 3290 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3291 ((c >= 0x3001) && (c <= 0xD7FF)) || 3292 ((c >= 0xF900) && (c <= 0xFDCF)) || 3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3294 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3295 return(1); 3296 } else { 3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3298 (c == '.') || (c == '-') || 3299 (c == '_') || (c == ':') || 3300 (IS_COMBINING(c)) || 3301 (IS_EXTENDER(c))) 3302 return(1); 3303 } 3304 return(0); 3305 } 3306 3307 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3308 int *len, int *alloc, int normalize); 3309 3310 static const xmlChar * 3311 xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3312 int len = 0, l; 3313 int c; 3314 int count = 0; 3315 3316 #ifdef DEBUG 3317 nbParseNameComplex++; 3318 #endif 3319 3320 /* 3321 * Handler for more complex cases 3322 */ 3323 GROW; 3324 if (ctxt->instate == XML_PARSER_EOF) 3325 return(NULL); 3326 c = CUR_CHAR(l); 3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3328 /* 3329 * Use the new checks of production [4] [4a] amd [5] of the 3330 * Update 5 of XML-1.0 3331 */ 3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3333 (!(((c >= 'a') && (c <= 'z')) || 3334 ((c >= 'A') && (c <= 'Z')) || 3335 (c == '_') || (c == ':') || 3336 ((c >= 0xC0) && (c <= 0xD6)) || 3337 ((c >= 0xD8) && (c <= 0xF6)) || 3338 ((c >= 0xF8) && (c <= 0x2FF)) || 3339 ((c >= 0x370) && (c <= 0x37D)) || 3340 ((c >= 0x37F) && (c <= 0x1FFF)) || 3341 ((c >= 0x200C) && (c <= 0x200D)) || 3342 ((c >= 0x2070) && (c <= 0x218F)) || 3343 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3344 ((c >= 0x3001) && (c <= 0xD7FF)) || 3345 ((c >= 0xF900) && (c <= 0xFDCF)) || 3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3348 return(NULL); 3349 } 3350 len += l; 3351 NEXTL(l); 3352 c = CUR_CHAR(l); 3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3354 (((c >= 'a') && (c <= 'z')) || 3355 ((c >= 'A') && (c <= 'Z')) || 3356 ((c >= '0') && (c <= '9')) || /* !start */ 3357 (c == '_') || (c == ':') || 3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3359 ((c >= 0xC0) && (c <= 0xD6)) || 3360 ((c >= 0xD8) && (c <= 0xF6)) || 3361 ((c >= 0xF8) && (c <= 0x2FF)) || 3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3363 ((c >= 0x370) && (c <= 0x37D)) || 3364 ((c >= 0x37F) && (c <= 0x1FFF)) || 3365 ((c >= 0x200C) && (c <= 0x200D)) || 3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3367 ((c >= 0x2070) && (c <= 0x218F)) || 3368 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3369 ((c >= 0x3001) && (c <= 0xD7FF)) || 3370 ((c >= 0xF900) && (c <= 0xFDCF)) || 3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3372 ((c >= 0x10000) && (c <= 0xEFFFF)) 3373 )) { 3374 if (count++ > XML_PARSER_CHUNK_SIZE) { 3375 count = 0; 3376 GROW; 3377 if (ctxt->instate == XML_PARSER_EOF) 3378 return(NULL); 3379 } 3380 len += l; 3381 NEXTL(l); 3382 c = CUR_CHAR(l); 3383 } 3384 } else { 3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3386 (!IS_LETTER(c) && (c != '_') && 3387 (c != ':'))) { 3388 return(NULL); 3389 } 3390 len += l; 3391 NEXTL(l); 3392 c = CUR_CHAR(l); 3393 3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3396 (c == '.') || (c == '-') || 3397 (c == '_') || (c == ':') || 3398 (IS_COMBINING(c)) || 3399 (IS_EXTENDER(c)))) { 3400 if (count++ > XML_PARSER_CHUNK_SIZE) { 3401 count = 0; 3402 GROW; 3403 if (ctxt->instate == XML_PARSER_EOF) 3404 return(NULL); 3405 } 3406 len += l; 3407 NEXTL(l); 3408 c = CUR_CHAR(l); 3409 if (c == 0) { 3410 count = 0; 3411 GROW; 3412 if (ctxt->instate == XML_PARSER_EOF) 3413 return(NULL); 3414 c = CUR_CHAR(l); 3415 } 3416 } 3417 } 3418 if ((len > XML_MAX_NAME_LENGTH) && 3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3421 return(NULL); 3422 } 3423 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3424 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3425 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3426 } 3427 3428 /** 3429 * xmlParseName: 3430 * @ctxt: an XML parser context 3431 * 3432 * parse an XML name. 3433 * 3434 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3435 * CombiningChar | Extender 3436 * 3437 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3438 * 3439 * [6] Names ::= Name (#x20 Name)* 3440 * 3441 * Returns the Name parsed or NULL 3442 */ 3443 3444 const xmlChar * 3445 xmlParseName(xmlParserCtxtPtr ctxt) { 3446 const xmlChar *in; 3447 const xmlChar *ret; 3448 int count = 0; 3449 3450 GROW; 3451 3452 #ifdef DEBUG 3453 nbParseName++; 3454 #endif 3455 3456 /* 3457 * Accelerator for simple ASCII names 3458 */ 3459 in = ctxt->input->cur; 3460 if (((*in >= 0x61) && (*in <= 0x7A)) || 3461 ((*in >= 0x41) && (*in <= 0x5A)) || 3462 (*in == '_') || (*in == ':')) { 3463 in++; 3464 while (((*in >= 0x61) && (*in <= 0x7A)) || 3465 ((*in >= 0x41) && (*in <= 0x5A)) || 3466 ((*in >= 0x30) && (*in <= 0x39)) || 3467 (*in == '_') || (*in == '-') || 3468 (*in == ':') || (*in == '.')) 3469 in++; 3470 if ((*in > 0) && (*in < 0x80)) { 3471 count = in - ctxt->input->cur; 3472 if ((count > XML_MAX_NAME_LENGTH) && 3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); 3475 return(NULL); 3476 } 3477 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3478 ctxt->input->cur = in; 3479 ctxt->nbChars += count; 3480 ctxt->input->col += count; 3481 if (ret == NULL) 3482 xmlErrMemory(ctxt, NULL); 3483 return(ret); 3484 } 3485 } 3486 /* accelerator for special cases */ 3487 return(xmlParseNameComplex(ctxt)); 3488 } 3489 3490 static const xmlChar * 3491 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3492 int len = 0, l; 3493 int c; 3494 int count = 0; 3495 size_t startPosition = 0; 3496 3497 #ifdef DEBUG 3498 nbParseNCNameComplex++; 3499 #endif 3500 3501 /* 3502 * Handler for more complex cases 3503 */ 3504 GROW; 3505 startPosition = CUR_PTR - BASE_PTR; 3506 c = CUR_CHAR(l); 3507 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3508 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3509 return(NULL); 3510 } 3511 3512 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3513 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3514 if (count++ > XML_PARSER_CHUNK_SIZE) { 3515 if ((len > XML_MAX_NAME_LENGTH) && 3516 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3517 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3518 return(NULL); 3519 } 3520 count = 0; 3521 GROW; 3522 if (ctxt->instate == XML_PARSER_EOF) 3523 return(NULL); 3524 } 3525 len += l; 3526 NEXTL(l); 3527 c = CUR_CHAR(l); 3528 if (c == 0) { 3529 count = 0; 3530 /* 3531 * when shrinking to extend the buffer we really need to preserve 3532 * the part of the name we already parsed. Hence rolling back 3533 * by current lenght. 3534 */ 3535 ctxt->input->cur -= l; 3536 GROW; 3537 ctxt->input->cur += l; 3538 if (ctxt->instate == XML_PARSER_EOF) 3539 return(NULL); 3540 c = CUR_CHAR(l); 3541 } 3542 } 3543 if ((len > XML_MAX_NAME_LENGTH) && 3544 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3545 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3546 return(NULL); 3547 } 3548 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); 3549 } 3550 3551 /** 3552 * xmlParseNCName: 3553 * @ctxt: an XML parser context 3554 * @len: length of the string parsed 3555 * 3556 * parse an XML name. 3557 * 3558 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3559 * CombiningChar | Extender 3560 * 3561 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3562 * 3563 * Returns the Name parsed or NULL 3564 */ 3565 3566 static const xmlChar * 3567 xmlParseNCName(xmlParserCtxtPtr ctxt) { 3568 const xmlChar *in, *e; 3569 const xmlChar *ret; 3570 int count = 0; 3571 3572 #ifdef DEBUG 3573 nbParseNCName++; 3574 #endif 3575 3576 /* 3577 * Accelerator for simple ASCII names 3578 */ 3579 in = ctxt->input->cur; 3580 e = ctxt->input->end; 3581 if ((((*in >= 0x61) && (*in <= 0x7A)) || 3582 ((*in >= 0x41) && (*in <= 0x5A)) || 3583 (*in == '_')) && (in < e)) { 3584 in++; 3585 while ((((*in >= 0x61) && (*in <= 0x7A)) || 3586 ((*in >= 0x41) && (*in <= 0x5A)) || 3587 ((*in >= 0x30) && (*in <= 0x39)) || 3588 (*in == '_') || (*in == '-') || 3589 (*in == '.')) && (in < e)) 3590 in++; 3591 if (in >= e) 3592 goto complex; 3593 if ((*in > 0) && (*in < 0x80)) { 3594 count = in - ctxt->input->cur; 3595 if ((count > XML_MAX_NAME_LENGTH) && 3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3598 return(NULL); 3599 } 3600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3601 ctxt->input->cur = in; 3602 ctxt->nbChars += count; 3603 ctxt->input->col += count; 3604 if (ret == NULL) { 3605 xmlErrMemory(ctxt, NULL); 3606 } 3607 return(ret); 3608 } 3609 } 3610 complex: 3611 return(xmlParseNCNameComplex(ctxt)); 3612 } 3613 3614 /** 3615 * xmlParseNameAndCompare: 3616 * @ctxt: an XML parser context 3617 * 3618 * parse an XML name and compares for match 3619 * (specialized for endtag parsing) 3620 * 3621 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3622 * and the name for mismatch 3623 */ 3624 3625 static const xmlChar * 3626 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3627 register const xmlChar *cmp = other; 3628 register const xmlChar *in; 3629 const xmlChar *ret; 3630 3631 GROW; 3632 if (ctxt->instate == XML_PARSER_EOF) 3633 return(NULL); 3634 3635 in = ctxt->input->cur; 3636 while (*in != 0 && *in == *cmp) { 3637 ++in; 3638 ++cmp; 3639 ctxt->input->col++; 3640 } 3641 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3642 /* success */ 3643 ctxt->input->cur = in; 3644 return (const xmlChar*) 1; 3645 } 3646 /* failure (or end of input buffer), check with full function */ 3647 ret = xmlParseName (ctxt); 3648 /* strings coming from the dictionary direct compare possible */ 3649 if (ret == other) { 3650 return (const xmlChar*) 1; 3651 } 3652 return ret; 3653 } 3654 3655 /** 3656 * xmlParseStringName: 3657 * @ctxt: an XML parser context 3658 * @str: a pointer to the string pointer (IN/OUT) 3659 * 3660 * parse an XML name. 3661 * 3662 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3663 * CombiningChar | Extender 3664 * 3665 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3666 * 3667 * [6] Names ::= Name (#x20 Name)* 3668 * 3669 * Returns the Name parsed or NULL. The @str pointer 3670 * is updated to the current location in the string. 3671 */ 3672 3673 static xmlChar * 3674 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3675 xmlChar buf[XML_MAX_NAMELEN + 5]; 3676 const xmlChar *cur = *str; 3677 int len = 0, l; 3678 int c; 3679 3680 #ifdef DEBUG 3681 nbParseStringName++; 3682 #endif 3683 3684 c = CUR_SCHAR(cur, l); 3685 if (!xmlIsNameStartChar(ctxt, c)) { 3686 return(NULL); 3687 } 3688 3689 COPY_BUF(l,buf,len,c); 3690 cur += l; 3691 c = CUR_SCHAR(cur, l); 3692 while (xmlIsNameChar(ctxt, c)) { 3693 COPY_BUF(l,buf,len,c); 3694 cur += l; 3695 c = CUR_SCHAR(cur, l); 3696 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3697 /* 3698 * Okay someone managed to make a huge name, so he's ready to pay 3699 * for the processing speed. 3700 */ 3701 xmlChar *buffer; 3702 int max = len * 2; 3703 3704 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3705 if (buffer == NULL) { 3706 xmlErrMemory(ctxt, NULL); 3707 return(NULL); 3708 } 3709 memcpy(buffer, buf, len); 3710 while (xmlIsNameChar(ctxt, c)) { 3711 if (len + 10 > max) { 3712 xmlChar *tmp; 3713 3714 if ((len > XML_MAX_NAME_LENGTH) && 3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3717 xmlFree(buffer); 3718 return(NULL); 3719 } 3720 max *= 2; 3721 tmp = (xmlChar *) xmlRealloc(buffer, 3722 max * sizeof(xmlChar)); 3723 if (tmp == NULL) { 3724 xmlErrMemory(ctxt, NULL); 3725 xmlFree(buffer); 3726 return(NULL); 3727 } 3728 buffer = tmp; 3729 } 3730 COPY_BUF(l,buffer,len,c); 3731 cur += l; 3732 c = CUR_SCHAR(cur, l); 3733 } 3734 buffer[len] = 0; 3735 *str = cur; 3736 return(buffer); 3737 } 3738 } 3739 if ((len > XML_MAX_NAME_LENGTH) && 3740 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3741 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); 3742 return(NULL); 3743 } 3744 *str = cur; 3745 return(xmlStrndup(buf, len)); 3746 } 3747 3748 /** 3749 * xmlParseNmtoken: 3750 * @ctxt: an XML parser context 3751 * 3752 * parse an XML Nmtoken. 3753 * 3754 * [7] Nmtoken ::= (NameChar)+ 3755 * 3756 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3757 * 3758 * Returns the Nmtoken parsed or NULL 3759 */ 3760 3761 xmlChar * 3762 xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3763 xmlChar buf[XML_MAX_NAMELEN + 5]; 3764 int len = 0, l; 3765 int c; 3766 int count = 0; 3767 3768 #ifdef DEBUG 3769 nbParseNmToken++; 3770 #endif 3771 3772 GROW; 3773 if (ctxt->instate == XML_PARSER_EOF) 3774 return(NULL); 3775 c = CUR_CHAR(l); 3776 3777 while (xmlIsNameChar(ctxt, c)) { 3778 if (count++ > XML_PARSER_CHUNK_SIZE) { 3779 count = 0; 3780 GROW; 3781 } 3782 COPY_BUF(l,buf,len,c); 3783 NEXTL(l); 3784 c = CUR_CHAR(l); 3785 if (c == 0) { 3786 count = 0; 3787 GROW; 3788 if (ctxt->instate == XML_PARSER_EOF) 3789 return(NULL); 3790 c = CUR_CHAR(l); 3791 } 3792 if (len >= XML_MAX_NAMELEN) { 3793 /* 3794 * Okay someone managed to make a huge token, so he's ready to pay 3795 * for the processing speed. 3796 */ 3797 xmlChar *buffer; 3798 int max = len * 2; 3799 3800 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3801 if (buffer == NULL) { 3802 xmlErrMemory(ctxt, NULL); 3803 return(NULL); 3804 } 3805 memcpy(buffer, buf, len); 3806 while (xmlIsNameChar(ctxt, c)) { 3807 if (count++ > XML_PARSER_CHUNK_SIZE) { 3808 count = 0; 3809 GROW; 3810 if (ctxt->instate == XML_PARSER_EOF) { 3811 xmlFree(buffer); 3812 return(NULL); 3813 } 3814 } 3815 if (len + 10 > max) { 3816 xmlChar *tmp; 3817 3818 if ((max > XML_MAX_NAME_LENGTH) && 3819 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3820 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3821 xmlFree(buffer); 3822 return(NULL); 3823 } 3824 max *= 2; 3825 tmp = (xmlChar *) xmlRealloc(buffer, 3826 max * sizeof(xmlChar)); 3827 if (tmp == NULL) { 3828 xmlErrMemory(ctxt, NULL); 3829 xmlFree(buffer); 3830 return(NULL); 3831 } 3832 buffer = tmp; 3833 } 3834 COPY_BUF(l,buffer,len,c); 3835 NEXTL(l); 3836 c = CUR_CHAR(l); 3837 } 3838 buffer[len] = 0; 3839 return(buffer); 3840 } 3841 } 3842 if (len == 0) 3843 return(NULL); 3844 if ((len > XML_MAX_NAME_LENGTH) && 3845 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 3846 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); 3847 return(NULL); 3848 } 3849 return(xmlStrndup(buf, len)); 3850 } 3851 3852 /** 3853 * xmlParseEntityValue: 3854 * @ctxt: an XML parser context 3855 * @orig: if non-NULL store a copy of the original entity value 3856 * 3857 * parse a value for ENTITY declarations 3858 * 3859 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3860 * "'" ([^%&'] | PEReference | Reference)* "'" 3861 * 3862 * Returns the EntityValue parsed with reference substituted or NULL 3863 */ 3864 3865 xmlChar * 3866 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3867 xmlChar *buf = NULL; 3868 int len = 0; 3869 int size = XML_PARSER_BUFFER_SIZE; 3870 int c, l; 3871 xmlChar stop; 3872 xmlChar *ret = NULL; 3873 const xmlChar *cur = NULL; 3874 xmlParserInputPtr input; 3875 3876 if (RAW == '"') stop = '"'; 3877 else if (RAW == '\'') stop = '\''; 3878 else { 3879 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3880 return(NULL); 3881 } 3882 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3883 if (buf == NULL) { 3884 xmlErrMemory(ctxt, NULL); 3885 return(NULL); 3886 } 3887 3888 /* 3889 * The content of the entity definition is copied in a buffer. 3890 */ 3891 3892 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3893 input = ctxt->input; 3894 GROW; 3895 if (ctxt->instate == XML_PARSER_EOF) { 3896 xmlFree(buf); 3897 return(NULL); 3898 } 3899 NEXT; 3900 c = CUR_CHAR(l); 3901 /* 3902 * NOTE: 4.4.5 Included in Literal 3903 * When a parameter entity reference appears in a literal entity 3904 * value, ... a single or double quote character in the replacement 3905 * text is always treated as a normal data character and will not 3906 * terminate the literal. 3907 * In practice it means we stop the loop only when back at parsing 3908 * the initial entity and the quote is found 3909 */ 3910 while (((IS_CHAR(c)) && ((c != stop) || /* checked */ 3911 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { 3912 if (len + 5 >= size) { 3913 xmlChar *tmp; 3914 3915 size *= 2; 3916 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3917 if (tmp == NULL) { 3918 xmlErrMemory(ctxt, NULL); 3919 xmlFree(buf); 3920 return(NULL); 3921 } 3922 buf = tmp; 3923 } 3924 COPY_BUF(l,buf,len,c); 3925 NEXTL(l); 3926 /* 3927 * Pop-up of finished entities. 3928 */ 3929 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3930 xmlPopInput(ctxt); 3931 3932 GROW; 3933 c = CUR_CHAR(l); 3934 if (c == 0) { 3935 GROW; 3936 c = CUR_CHAR(l); 3937 } 3938 } 3939 buf[len] = 0; 3940 if (ctxt->instate == XML_PARSER_EOF) { 3941 xmlFree(buf); 3942 return(NULL); 3943 } 3944 3945 /* 3946 * Raise problem w.r.t. '&' and '%' being used in non-entities 3947 * reference constructs. Note Charref will be handled in 3948 * xmlStringDecodeEntities() 3949 */ 3950 cur = buf; 3951 while (*cur != 0) { /* non input consuming */ 3952 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3953 xmlChar *name; 3954 xmlChar tmp = *cur; 3955 3956 cur++; 3957 name = xmlParseStringName(ctxt, &cur); 3958 if ((name == NULL) || (*cur != ';')) { 3959 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3960 "EntityValue: '%c' forbidden except for entities references\n", 3961 tmp); 3962 } 3963 if ((tmp == '%') && (ctxt->inSubset == 1) && 3964 (ctxt->inputNr == 1)) { 3965 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3966 } 3967 if (name != NULL) 3968 xmlFree(name); 3969 if (*cur == 0) 3970 break; 3971 } 3972 cur++; 3973 } 3974 3975 /* 3976 * Then PEReference entities are substituted. 3977 */ 3978 if (c != stop) { 3979 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3980 xmlFree(buf); 3981 } else { 3982 NEXT; 3983 /* 3984 * NOTE: 4.4.7 Bypassed 3985 * When a general entity reference appears in the EntityValue in 3986 * an entity declaration, it is bypassed and left as is. 3987 * so XML_SUBSTITUTE_REF is not set here. 3988 */ 3989 ++ctxt->depth; 3990 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3991 0, 0, 0); 3992 --ctxt->depth; 3993 if (orig != NULL) 3994 *orig = buf; 3995 else 3996 xmlFree(buf); 3997 } 3998 3999 return(ret); 4000 } 4001 4002 /** 4003 * xmlParseAttValueComplex: 4004 * @ctxt: an XML parser context 4005 * @len: the resulting attribute len 4006 * @normalize: wether to apply the inner normalization 4007 * 4008 * parse a value for an attribute, this is the fallback function 4009 * of xmlParseAttValue() when the attribute parsing requires handling 4010 * of non-ASCII characters, or normalization compaction. 4011 * 4012 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4013 */ 4014 static xmlChar * 4015 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 4016 xmlChar limit = 0; 4017 xmlChar *buf = NULL; 4018 xmlChar *rep = NULL; 4019 size_t len = 0; 4020 size_t buf_size = 0; 4021 int c, l, in_space = 0; 4022 xmlChar *current = NULL; 4023 xmlEntityPtr ent; 4024 4025 if (NXT(0) == '"') { 4026 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4027 limit = '"'; 4028 NEXT; 4029 } else if (NXT(0) == '\'') { 4030 limit = '\''; 4031 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 4032 NEXT; 4033 } else { 4034 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 4035 return(NULL); 4036 } 4037 4038 /* 4039 * allocate a translation buffer. 4040 */ 4041 buf_size = XML_PARSER_BUFFER_SIZE; 4042 buf = (xmlChar *) xmlMallocAtomic(buf_size); 4043 if (buf == NULL) goto mem_error; 4044 4045 /* 4046 * OK loop until we reach one of the ending char or a size limit. 4047 */ 4048 c = CUR_CHAR(l); 4049 while (((NXT(0) != limit) && /* checked */ 4050 (IS_CHAR(c)) && (c != '<')) && 4051 (ctxt->instate != XML_PARSER_EOF)) { 4052 /* 4053 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE 4054 * special option is given 4055 */ 4056 if ((len > XML_MAX_TEXT_LENGTH) && 4057 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4059 "AttValue length too long\n"); 4060 goto mem_error; 4061 } 4062 if (c == 0) break; 4063 if (c == '&') { 4064 in_space = 0; 4065 if (NXT(1) == '#') { 4066 int val = xmlParseCharRef(ctxt); 4067 4068 if (val == '&') { 4069 if (ctxt->replaceEntities) { 4070 if (len + 10 > buf_size) { 4071 growBuffer(buf, 10); 4072 } 4073 buf[len++] = '&'; 4074 } else { 4075 /* 4076 * The reparsing will be done in xmlStringGetNodeList() 4077 * called by the attribute() function in SAX.c 4078 */ 4079 if (len + 10 > buf_size) { 4080 growBuffer(buf, 10); 4081 } 4082 buf[len++] = '&'; 4083 buf[len++] = '#'; 4084 buf[len++] = '3'; 4085 buf[len++] = '8'; 4086 buf[len++] = ';'; 4087 } 4088 } else if (val != 0) { 4089 if (len + 10 > buf_size) { 4090 growBuffer(buf, 10); 4091 } 4092 len += xmlCopyChar(0, &buf[len], val); 4093 } 4094 } else { 4095 ent = xmlParseEntityRef(ctxt); 4096 ctxt->nbentities++; 4097 if (ent != NULL) 4098 ctxt->nbentities += ent->owner; 4099 if ((ent != NULL) && 4100 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 4101 if (len + 10 > buf_size) { 4102 growBuffer(buf, 10); 4103 } 4104 if ((ctxt->replaceEntities == 0) && 4105 (ent->content[0] == '&')) { 4106 buf[len++] = '&'; 4107 buf[len++] = '#'; 4108 buf[len++] = '3'; 4109 buf[len++] = '8'; 4110 buf[len++] = ';'; 4111 } else { 4112 buf[len++] = ent->content[0]; 4113 } 4114 } else if ((ent != NULL) && 4115 (ctxt->replaceEntities != 0)) { 4116 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 4117 ++ctxt->depth; 4118 rep = xmlStringDecodeEntities(ctxt, ent->content, 4119 XML_SUBSTITUTE_REF, 4120 0, 0, 0); 4121 --ctxt->depth; 4122 if (rep != NULL) { 4123 current = rep; 4124 while (*current != 0) { /* non input consuming */ 4125 if ((*current == 0xD) || (*current == 0xA) || 4126 (*current == 0x9)) { 4127 buf[len++] = 0x20; 4128 current++; 4129 } else 4130 buf[len++] = *current++; 4131 if (len + 10 > buf_size) { 4132 growBuffer(buf, 10); 4133 } 4134 } 4135 xmlFree(rep); 4136 rep = NULL; 4137 } 4138 } else { 4139 if (len + 10 > buf_size) { 4140 growBuffer(buf, 10); 4141 } 4142 if (ent->content != NULL) 4143 buf[len++] = ent->content[0]; 4144 } 4145 } else if (ent != NULL) { 4146 int i = xmlStrlen(ent->name); 4147 const xmlChar *cur = ent->name; 4148 4149 /* 4150 * This may look absurd but is needed to detect 4151 * entities problems 4152 */ 4153 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 4154 (ent->content != NULL) && (ent->checked == 0)) { 4155 unsigned long oldnbent = ctxt->nbentities; 4156 4157 ++ctxt->depth; 4158 rep = xmlStringDecodeEntities(ctxt, ent->content, 4159 XML_SUBSTITUTE_REF, 0, 0, 0); 4160 --ctxt->depth; 4161 4162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 4163 if (rep != NULL) { 4164 if (xmlStrchr(rep, '<')) 4165 ent->checked |= 1; 4166 xmlFree(rep); 4167 rep = NULL; 4168 } 4169 } 4170 4171 /* 4172 * Just output the reference 4173 */ 4174 buf[len++] = '&'; 4175 while (len + i + 10 > buf_size) { 4176 growBuffer(buf, i + 10); 4177 } 4178 for (;i > 0;i--) 4179 buf[len++] = *cur++; 4180 buf[len++] = ';'; 4181 } 4182 } 4183 } else { 4184 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 4185 if ((len != 0) || (!normalize)) { 4186 if ((!normalize) || (!in_space)) { 4187 COPY_BUF(l,buf,len,0x20); 4188 while (len + 10 > buf_size) { 4189 growBuffer(buf, 10); 4190 } 4191 } 4192 in_space = 1; 4193 } 4194 } else { 4195 in_space = 0; 4196 COPY_BUF(l,buf,len,c); 4197 if (len + 10 > buf_size) { 4198 growBuffer(buf, 10); 4199 } 4200 } 4201 NEXTL(l); 4202 } 4203 GROW; 4204 c = CUR_CHAR(l); 4205 } 4206 if (ctxt->instate == XML_PARSER_EOF) 4207 goto error; 4208 4209 if ((in_space) && (normalize)) { 4210 while ((len > 0) && (buf[len - 1] == 0x20)) len--; 4211 } 4212 buf[len] = 0; 4213 if (RAW == '<') { 4214 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 4215 } else if (RAW != limit) { 4216 if ((c != 0) && (!IS_CHAR(c))) { 4217 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 4218 "invalid character in attribute value\n"); 4219 } else { 4220 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4221 "AttValue: ' expected\n"); 4222 } 4223 } else 4224 NEXT; 4225 4226 /* 4227 * There we potentially risk an overflow, don't allow attribute value of 4228 * length more than INT_MAX it is a very reasonnable assumption ! 4229 */ 4230 if (len >= INT_MAX) { 4231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 4232 "AttValue length too long\n"); 4233 goto mem_error; 4234 } 4235 4236 if (attlen != NULL) *attlen = (int) len; 4237 return(buf); 4238 4239 mem_error: 4240 xmlErrMemory(ctxt, NULL); 4241 error: 4242 if (buf != NULL) 4243 xmlFree(buf); 4244 if (rep != NULL) 4245 xmlFree(rep); 4246 return(NULL); 4247 } 4248 4249 /** 4250 * xmlParseAttValue: 4251 * @ctxt: an XML parser context 4252 * 4253 * parse a value for an attribute 4254 * Note: the parser won't do substitution of entities here, this 4255 * will be handled later in xmlStringGetNodeList 4256 * 4257 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 4258 * "'" ([^<&'] | Reference)* "'" 4259 * 4260 * 3.3.3 Attribute-Value Normalization: 4261 * Before the value of an attribute is passed to the application or 4262 * checked for validity, the XML processor must normalize it as follows: 4263 * - a character reference is processed by appending the referenced 4264 * character to the attribute value 4265 * - an entity reference is processed by recursively processing the 4266 * replacement text of the entity 4267 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 4268 * appending #x20 to the normalized value, except that only a single 4269 * #x20 is appended for a "#xD#xA" sequence that is part of an external 4270 * parsed entity or the literal entity value of an internal parsed entity 4271 * - other characters are processed by appending them to the normalized value 4272 * If the declared value is not CDATA, then the XML processor must further 4273 * process the normalized attribute value by discarding any leading and 4274 * trailing space (#x20) characters, and by replacing sequences of space 4275 * (#x20) characters by a single space (#x20) character. 4276 * All attributes for which no declaration has been read should be treated 4277 * by a non-validating parser as if declared CDATA. 4278 * 4279 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 4280 */ 4281 4282 4283 xmlChar * 4284 xmlParseAttValue(xmlParserCtxtPtr ctxt) { 4285 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 4286 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 4287 } 4288 4289 /** 4290 * xmlParseSystemLiteral: 4291 * @ctxt: an XML parser context 4292 * 4293 * parse an XML Literal 4294 * 4295 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 4296 * 4297 * Returns the SystemLiteral parsed or NULL 4298 */ 4299 4300 xmlChar * 4301 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 4302 xmlChar *buf = NULL; 4303 int len = 0; 4304 int size = XML_PARSER_BUFFER_SIZE; 4305 int cur, l; 4306 xmlChar stop; 4307 int state = ctxt->instate; 4308 int count = 0; 4309 4310 SHRINK; 4311 if (RAW == '"') { 4312 NEXT; 4313 stop = '"'; 4314 } else if (RAW == '\'') { 4315 NEXT; 4316 stop = '\''; 4317 } else { 4318 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4319 return(NULL); 4320 } 4321 4322 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4323 if (buf == NULL) { 4324 xmlErrMemory(ctxt, NULL); 4325 return(NULL); 4326 } 4327 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 4328 cur = CUR_CHAR(l); 4329 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 4330 if (len + 5 >= size) { 4331 xmlChar *tmp; 4332 4333 if ((size > XML_MAX_NAME_LENGTH) && 4334 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4335 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); 4336 xmlFree(buf); 4337 ctxt->instate = (xmlParserInputState) state; 4338 return(NULL); 4339 } 4340 size *= 2; 4341 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4342 if (tmp == NULL) { 4343 xmlFree(buf); 4344 xmlErrMemory(ctxt, NULL); 4345 ctxt->instate = (xmlParserInputState) state; 4346 return(NULL); 4347 } 4348 buf = tmp; 4349 } 4350 count++; 4351 if (count > 50) { 4352 GROW; 4353 count = 0; 4354 if (ctxt->instate == XML_PARSER_EOF) { 4355 xmlFree(buf); 4356 return(NULL); 4357 } 4358 } 4359 COPY_BUF(l,buf,len,cur); 4360 NEXTL(l); 4361 cur = CUR_CHAR(l); 4362 if (cur == 0) { 4363 GROW; 4364 SHRINK; 4365 cur = CUR_CHAR(l); 4366 } 4367 } 4368 buf[len] = 0; 4369 ctxt->instate = (xmlParserInputState) state; 4370 if (!IS_CHAR(cur)) { 4371 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4372 } else { 4373 NEXT; 4374 } 4375 return(buf); 4376 } 4377 4378 /** 4379 * xmlParsePubidLiteral: 4380 * @ctxt: an XML parser context 4381 * 4382 * parse an XML public literal 4383 * 4384 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 4385 * 4386 * Returns the PubidLiteral parsed or NULL. 4387 */ 4388 4389 xmlChar * 4390 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 4391 xmlChar *buf = NULL; 4392 int len = 0; 4393 int size = XML_PARSER_BUFFER_SIZE; 4394 xmlChar cur; 4395 xmlChar stop; 4396 int count = 0; 4397 xmlParserInputState oldstate = ctxt->instate; 4398 4399 SHRINK; 4400 if (RAW == '"') { 4401 NEXT; 4402 stop = '"'; 4403 } else if (RAW == '\'') { 4404 NEXT; 4405 stop = '\''; 4406 } else { 4407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 4408 return(NULL); 4409 } 4410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4411 if (buf == NULL) { 4412 xmlErrMemory(ctxt, NULL); 4413 return(NULL); 4414 } 4415 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 4416 cur = CUR; 4417 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 4418 if (len + 1 >= size) { 4419 xmlChar *tmp; 4420 4421 if ((size > XML_MAX_NAME_LENGTH) && 4422 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4423 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); 4424 xmlFree(buf); 4425 return(NULL); 4426 } 4427 size *= 2; 4428 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4429 if (tmp == NULL) { 4430 xmlErrMemory(ctxt, NULL); 4431 xmlFree(buf); 4432 return(NULL); 4433 } 4434 buf = tmp; 4435 } 4436 buf[len++] = cur; 4437 count++; 4438 if (count > 50) { 4439 GROW; 4440 count = 0; 4441 if (ctxt->instate == XML_PARSER_EOF) { 4442 xmlFree(buf); 4443 return(NULL); 4444 } 4445 } 4446 NEXT; 4447 cur = CUR; 4448 if (cur == 0) { 4449 GROW; 4450 SHRINK; 4451 cur = CUR; 4452 } 4453 } 4454 buf[len] = 0; 4455 if (cur != stop) { 4456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4457 } else { 4458 NEXT; 4459 } 4460 ctxt->instate = oldstate; 4461 return(buf); 4462 } 4463 4464 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4465 4466 /* 4467 * used for the test in the inner loop of the char data testing 4468 */ 4469 static const unsigned char test_char_data[256] = { 4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4502 }; 4503 4504 /** 4505 * xmlParseCharData: 4506 * @ctxt: an XML parser context 4507 * @cdata: int indicating whether we are within a CDATA section 4508 * 4509 * parse a CharData section. 4510 * if we are within a CDATA section ']]>' marks an end of section. 4511 * 4512 * The right angle bracket (>) may be represented using the string ">", 4513 * and must, for compatibility, be escaped using ">" or a character 4514 * reference when it appears in the string "]]>" in content, when that 4515 * string is not marking the end of a CDATA section. 4516 * 4517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4518 */ 4519 4520 void 4521 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4522 const xmlChar *in; 4523 int nbchar = 0; 4524 int line = ctxt->input->line; 4525 int col = ctxt->input->col; 4526 int ccol; 4527 4528 SHRINK; 4529 GROW; 4530 /* 4531 * Accelerated common case where input don't need to be 4532 * modified before passing it to the handler. 4533 */ 4534 if (!cdata) { 4535 in = ctxt->input->cur; 4536 do { 4537 get_more_space: 4538 while (*in == 0x20) { in++; ctxt->input->col++; } 4539 if (*in == 0xA) { 4540 do { 4541 ctxt->input->line++; ctxt->input->col = 1; 4542 in++; 4543 } while (*in == 0xA); 4544 goto get_more_space; 4545 } 4546 if (*in == '<') { 4547 nbchar = in - ctxt->input->cur; 4548 if (nbchar > 0) { 4549 const xmlChar *tmp = ctxt->input->cur; 4550 ctxt->input->cur = in; 4551 4552 if ((ctxt->sax != NULL) && 4553 (ctxt->sax->ignorableWhitespace != 4554 ctxt->sax->characters)) { 4555 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4556 if (ctxt->sax->ignorableWhitespace != NULL) 4557 ctxt->sax->ignorableWhitespace(ctxt->userData, 4558 tmp, nbchar); 4559 } else { 4560 if (ctxt->sax->characters != NULL) 4561 ctxt->sax->characters(ctxt->userData, 4562 tmp, nbchar); 4563 if (*ctxt->space == -1) 4564 *ctxt->space = -2; 4565 } 4566 } else if ((ctxt->sax != NULL) && 4567 (ctxt->sax->characters != NULL)) { 4568 ctxt->sax->characters(ctxt->userData, 4569 tmp, nbchar); 4570 } 4571 } 4572 return; 4573 } 4574 4575 get_more: 4576 ccol = ctxt->input->col; 4577 while (test_char_data[*in]) { 4578 in++; 4579 ccol++; 4580 } 4581 ctxt->input->col = ccol; 4582 if (*in == 0xA) { 4583 do { 4584 ctxt->input->line++; ctxt->input->col = 1; 4585 in++; 4586 } while (*in == 0xA); 4587 goto get_more; 4588 } 4589 if (*in == ']') { 4590 if ((in[1] == ']') && (in[2] == '>')) { 4591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4592 ctxt->input->cur = in; 4593 return; 4594 } 4595 in++; 4596 ctxt->input->col++; 4597 goto get_more; 4598 } 4599 nbchar = in - ctxt->input->cur; 4600 if (nbchar > 0) { 4601 if ((ctxt->sax != NULL) && 4602 (ctxt->sax->ignorableWhitespace != 4603 ctxt->sax->characters) && 4604 (IS_BLANK_CH(*ctxt->input->cur))) { 4605 const xmlChar *tmp = ctxt->input->cur; 4606 ctxt->input->cur = in; 4607 4608 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4609 if (ctxt->sax->ignorableWhitespace != NULL) 4610 ctxt->sax->ignorableWhitespace(ctxt->userData, 4611 tmp, nbchar); 4612 } else { 4613 if (ctxt->sax->characters != NULL) 4614 ctxt->sax->characters(ctxt->userData, 4615 tmp, nbchar); 4616 if (*ctxt->space == -1) 4617 *ctxt->space = -2; 4618 } 4619 line = ctxt->input->line; 4620 col = ctxt->input->col; 4621 } else if (ctxt->sax != NULL) { 4622 if (ctxt->sax->characters != NULL) 4623 ctxt->sax->characters(ctxt->userData, 4624 ctxt->input->cur, nbchar); 4625 line = ctxt->input->line; 4626 col = ctxt->input->col; 4627 } 4628 /* something really bad happened in the SAX callback */ 4629 if (ctxt->instate != XML_PARSER_CONTENT) 4630 return; 4631 } 4632 ctxt->input->cur = in; 4633 if (*in == 0xD) { 4634 in++; 4635 if (*in == 0xA) { 4636 ctxt->input->cur = in; 4637 in++; 4638 ctxt->input->line++; ctxt->input->col = 1; 4639 continue; /* while */ 4640 } 4641 in--; 4642 } 4643 if (*in == '<') { 4644 return; 4645 } 4646 if (*in == '&') { 4647 return; 4648 } 4649 SHRINK; 4650 GROW; 4651 if (ctxt->instate == XML_PARSER_EOF) 4652 return; 4653 in = ctxt->input->cur; 4654 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4655 nbchar = 0; 4656 } 4657 ctxt->input->line = line; 4658 ctxt->input->col = col; 4659 xmlParseCharDataComplex(ctxt, cdata); 4660 } 4661 4662 /** 4663 * xmlParseCharDataComplex: 4664 * @ctxt: an XML parser context 4665 * @cdata: int indicating whether we are within a CDATA section 4666 * 4667 * parse a CharData section.this is the fallback function 4668 * of xmlParseCharData() when the parsing requires handling 4669 * of non-ASCII characters. 4670 */ 4671 static void 4672 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4673 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4674 int nbchar = 0; 4675 int cur, l; 4676 int count = 0; 4677 4678 SHRINK; 4679 GROW; 4680 cur = CUR_CHAR(l); 4681 while ((cur != '<') && /* checked */ 4682 (cur != '&') && 4683 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4684 if ((cur == ']') && (NXT(1) == ']') && 4685 (NXT(2) == '>')) { 4686 if (cdata) break; 4687 else { 4688 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4689 } 4690 } 4691 COPY_BUF(l,buf,nbchar,cur); 4692 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4693 buf[nbchar] = 0; 4694 4695 /* 4696 * OK the segment is to be consumed as chars. 4697 */ 4698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4699 if (areBlanks(ctxt, buf, nbchar, 0)) { 4700 if (ctxt->sax->ignorableWhitespace != NULL) 4701 ctxt->sax->ignorableWhitespace(ctxt->userData, 4702 buf, nbchar); 4703 } else { 4704 if (ctxt->sax->characters != NULL) 4705 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4706 if ((ctxt->sax->characters != 4707 ctxt->sax->ignorableWhitespace) && 4708 (*ctxt->space == -1)) 4709 *ctxt->space = -2; 4710 } 4711 } 4712 nbchar = 0; 4713 /* something really bad happened in the SAX callback */ 4714 if (ctxt->instate != XML_PARSER_CONTENT) 4715 return; 4716 } 4717 count++; 4718 if (count > 50) { 4719 GROW; 4720 count = 0; 4721 if (ctxt->instate == XML_PARSER_EOF) 4722 return; 4723 } 4724 NEXTL(l); 4725 cur = CUR_CHAR(l); 4726 } 4727 if (nbchar != 0) { 4728 buf[nbchar] = 0; 4729 /* 4730 * OK the segment is to be consumed as chars. 4731 */ 4732 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4733 if (areBlanks(ctxt, buf, nbchar, 0)) { 4734 if (ctxt->sax->ignorableWhitespace != NULL) 4735 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4736 } else { 4737 if (ctxt->sax->characters != NULL) 4738 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4739 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4740 (*ctxt->space == -1)) 4741 *ctxt->space = -2; 4742 } 4743 } 4744 } 4745 if ((cur != 0) && (!IS_CHAR(cur))) { 4746 /* Generate the error and skip the offending character */ 4747 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4748 "PCDATA invalid Char value %d\n", 4749 cur); 4750 NEXTL(l); 4751 } 4752 } 4753 4754 /** 4755 * xmlParseExternalID: 4756 * @ctxt: an XML parser context 4757 * @publicID: a xmlChar** receiving PubidLiteral 4758 * @strict: indicate whether we should restrict parsing to only 4759 * production [75], see NOTE below 4760 * 4761 * Parse an External ID or a Public ID 4762 * 4763 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4764 * 'PUBLIC' S PubidLiteral S SystemLiteral 4765 * 4766 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4767 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4768 * 4769 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4770 * 4771 * Returns the function returns SystemLiteral and in the second 4772 * case publicID receives PubidLiteral, is strict is off 4773 * it is possible to return NULL and have publicID set. 4774 */ 4775 4776 xmlChar * 4777 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4778 xmlChar *URI = NULL; 4779 4780 SHRINK; 4781 4782 *publicID = NULL; 4783 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4784 SKIP(6); 4785 if (!IS_BLANK_CH(CUR)) { 4786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4787 "Space required after 'SYSTEM'\n"); 4788 } 4789 SKIP_BLANKS; 4790 URI = xmlParseSystemLiteral(ctxt); 4791 if (URI == NULL) { 4792 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4793 } 4794 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4795 SKIP(6); 4796 if (!IS_BLANK_CH(CUR)) { 4797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4798 "Space required after 'PUBLIC'\n"); 4799 } 4800 SKIP_BLANKS; 4801 *publicID = xmlParsePubidLiteral(ctxt); 4802 if (*publicID == NULL) { 4803 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4804 } 4805 if (strict) { 4806 /* 4807 * We don't handle [83] so "S SystemLiteral" is required. 4808 */ 4809 if (!IS_BLANK_CH(CUR)) { 4810 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4811 "Space required after the Public Identifier\n"); 4812 } 4813 } else { 4814 /* 4815 * We handle [83] so we return immediately, if 4816 * "S SystemLiteral" is not detected. From a purely parsing 4817 * point of view that's a nice mess. 4818 */ 4819 const xmlChar *ptr; 4820 GROW; 4821 4822 ptr = CUR_PTR; 4823 if (!IS_BLANK_CH(*ptr)) return(NULL); 4824 4825 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4826 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4827 } 4828 SKIP_BLANKS; 4829 URI = xmlParseSystemLiteral(ctxt); 4830 if (URI == NULL) { 4831 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4832 } 4833 } 4834 return(URI); 4835 } 4836 4837 /** 4838 * xmlParseCommentComplex: 4839 * @ctxt: an XML parser context 4840 * @buf: the already parsed part of the buffer 4841 * @len: number of bytes filles in the buffer 4842 * @size: allocated size of the buffer 4843 * 4844 * Skip an XML (SGML) comment <!-- .... --> 4845 * The spec says that "For compatibility, the string "--" (double-hyphen) 4846 * must not occur within comments. " 4847 * This is the slow routine in case the accelerator for ascii didn't work 4848 * 4849 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4850 */ 4851 static void 4852 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, 4853 size_t len, size_t size) { 4854 int q, ql; 4855 int r, rl; 4856 int cur, l; 4857 size_t count = 0; 4858 int inputid; 4859 4860 inputid = ctxt->input->id; 4861 4862 if (buf == NULL) { 4863 len = 0; 4864 size = XML_PARSER_BUFFER_SIZE; 4865 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4866 if (buf == NULL) { 4867 xmlErrMemory(ctxt, NULL); 4868 return; 4869 } 4870 } 4871 GROW; /* Assure there's enough input data */ 4872 q = CUR_CHAR(ql); 4873 if (q == 0) 4874 goto not_terminated; 4875 if (!IS_CHAR(q)) { 4876 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4877 "xmlParseComment: invalid xmlChar value %d\n", 4878 q); 4879 xmlFree (buf); 4880 return; 4881 } 4882 NEXTL(ql); 4883 r = CUR_CHAR(rl); 4884 if (r == 0) 4885 goto not_terminated; 4886 if (!IS_CHAR(r)) { 4887 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4888 "xmlParseComment: invalid xmlChar value %d\n", 4889 q); 4890 xmlFree (buf); 4891 return; 4892 } 4893 NEXTL(rl); 4894 cur = CUR_CHAR(l); 4895 if (cur == 0) 4896 goto not_terminated; 4897 while (IS_CHAR(cur) && /* checked */ 4898 ((cur != '>') || 4899 (r != '-') || (q != '-'))) { 4900 if ((r == '-') && (q == '-')) { 4901 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4902 } 4903 if ((len > XML_MAX_TEXT_LENGTH) && 4904 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 4905 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4906 "Comment too big found", NULL); 4907 xmlFree (buf); 4908 return; 4909 } 4910 if (len + 5 >= size) { 4911 xmlChar *new_buf; 4912 size_t new_size; 4913 4914 new_size = size * 2; 4915 new_buf = (xmlChar *) xmlRealloc(buf, new_size); 4916 if (new_buf == NULL) { 4917 xmlFree (buf); 4918 xmlErrMemory(ctxt, NULL); 4919 return; 4920 } 4921 buf = new_buf; 4922 size = new_size; 4923 } 4924 COPY_BUF(ql,buf,len,q); 4925 q = r; 4926 ql = rl; 4927 r = cur; 4928 rl = l; 4929 4930 count++; 4931 if (count > 50) { 4932 GROW; 4933 count = 0; 4934 if (ctxt->instate == XML_PARSER_EOF) { 4935 xmlFree(buf); 4936 return; 4937 } 4938 } 4939 NEXTL(l); 4940 cur = CUR_CHAR(l); 4941 if (cur == 0) { 4942 SHRINK; 4943 GROW; 4944 cur = CUR_CHAR(l); 4945 } 4946 } 4947 buf[len] = 0; 4948 if (cur == 0) { 4949 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4950 "Comment not terminated \n<!--%.50s\n", buf); 4951 } else if (!IS_CHAR(cur)) { 4952 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4953 "xmlParseComment: invalid xmlChar value %d\n", 4954 cur); 4955 } else { 4956 if (inputid != ctxt->input->id) { 4957 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4958 "Comment doesn't start and stop in the same entity\n"); 4959 } 4960 NEXT; 4961 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4962 (!ctxt->disableSAX)) 4963 ctxt->sax->comment(ctxt->userData, buf); 4964 } 4965 xmlFree(buf); 4966 return; 4967 not_terminated: 4968 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4969 "Comment not terminated\n", NULL); 4970 xmlFree(buf); 4971 return; 4972 } 4973 4974 /** 4975 * xmlParseComment: 4976 * @ctxt: an XML parser context 4977 * 4978 * Skip an XML (SGML) comment <!-- .... --> 4979 * The spec says that "For compatibility, the string "--" (double-hyphen) 4980 * must not occur within comments. " 4981 * 4982 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4983 */ 4984 void 4985 xmlParseComment(xmlParserCtxtPtr ctxt) { 4986 xmlChar *buf = NULL; 4987 size_t size = XML_PARSER_BUFFER_SIZE; 4988 size_t len = 0; 4989 xmlParserInputState state; 4990 const xmlChar *in; 4991 size_t nbchar = 0; 4992 int ccol; 4993 int inputid; 4994 4995 /* 4996 * Check that there is a comment right here. 4997 */ 4998 if ((RAW != '<') || (NXT(1) != '!') || 4999 (NXT(2) != '-') || (NXT(3) != '-')) return; 5000 state = ctxt->instate; 5001 ctxt->instate = XML_PARSER_COMMENT; 5002 inputid = ctxt->input->id; 5003 SKIP(4); 5004 SHRINK; 5005 GROW; 5006 5007 /* 5008 * Accelerated common case where input don't need to be 5009 * modified before passing it to the handler. 5010 */ 5011 in = ctxt->input->cur; 5012 do { 5013 if (*in == 0xA) { 5014 do { 5015 ctxt->input->line++; ctxt->input->col = 1; 5016 in++; 5017 } while (*in == 0xA); 5018 } 5019 get_more: 5020 ccol = ctxt->input->col; 5021 while (((*in > '-') && (*in <= 0x7F)) || 5022 ((*in >= 0x20) && (*in < '-')) || 5023 (*in == 0x09)) { 5024 in++; 5025 ccol++; 5026 } 5027 ctxt->input->col = ccol; 5028 if (*in == 0xA) { 5029 do { 5030 ctxt->input->line++; ctxt->input->col = 1; 5031 in++; 5032 } while (*in == 0xA); 5033 goto get_more; 5034 } 5035 nbchar = in - ctxt->input->cur; 5036 /* 5037 * save current set of data 5038 */ 5039 if (nbchar > 0) { 5040 if ((ctxt->sax != NULL) && 5041 (ctxt->sax->comment != NULL)) { 5042 if (buf == NULL) { 5043 if ((*in == '-') && (in[1] == '-')) 5044 size = nbchar + 1; 5045 else 5046 size = XML_PARSER_BUFFER_SIZE + nbchar; 5047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5048 if (buf == NULL) { 5049 xmlErrMemory(ctxt, NULL); 5050 ctxt->instate = state; 5051 return; 5052 } 5053 len = 0; 5054 } else if (len + nbchar + 1 >= size) { 5055 xmlChar *new_buf; 5056 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 5057 new_buf = (xmlChar *) xmlRealloc(buf, 5058 size * sizeof(xmlChar)); 5059 if (new_buf == NULL) { 5060 xmlFree (buf); 5061 xmlErrMemory(ctxt, NULL); 5062 ctxt->instate = state; 5063 return; 5064 } 5065 buf = new_buf; 5066 } 5067 memcpy(&buf[len], ctxt->input->cur, nbchar); 5068 len += nbchar; 5069 buf[len] = 0; 5070 } 5071 } 5072 if ((len > XML_MAX_TEXT_LENGTH) && 5073 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 5075 "Comment too big found", NULL); 5076 xmlFree (buf); 5077 return; 5078 } 5079 ctxt->input->cur = in; 5080 if (*in == 0xA) { 5081 in++; 5082 ctxt->input->line++; ctxt->input->col = 1; 5083 } 5084 if (*in == 0xD) { 5085 in++; 5086 if (*in == 0xA) { 5087 ctxt->input->cur = in; 5088 in++; 5089 ctxt->input->line++; ctxt->input->col = 1; 5090 continue; /* while */ 5091 } 5092 in--; 5093 } 5094 SHRINK; 5095 GROW; 5096 if (ctxt->instate == XML_PARSER_EOF) { 5097 xmlFree(buf); 5098 return; 5099 } 5100 in = ctxt->input->cur; 5101 if (*in == '-') { 5102 if (in[1] == '-') { 5103 if (in[2] == '>') { 5104 if (ctxt->input->id != inputid) { 5105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5106 "comment doesn't start and stop in the same entity\n"); 5107 } 5108 SKIP(3); 5109 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 5110 (!ctxt->disableSAX)) { 5111 if (buf != NULL) 5112 ctxt->sax->comment(ctxt->userData, buf); 5113 else 5114 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 5115 } 5116 if (buf != NULL) 5117 xmlFree(buf); 5118 if (ctxt->instate != XML_PARSER_EOF) 5119 ctxt->instate = state; 5120 return; 5121 } 5122 if (buf != NULL) { 5123 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5124 "Double hyphen within comment: " 5125 "<!--%.50s\n", 5126 buf); 5127 } else 5128 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, 5129 "Double hyphen within comment\n", NULL); 5130 in++; 5131 ctxt->input->col++; 5132 } 5133 in++; 5134 ctxt->input->col++; 5135 goto get_more; 5136 } 5137 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 5138 xmlParseCommentComplex(ctxt, buf, len, size); 5139 ctxt->instate = state; 5140 return; 5141 } 5142 5143 5144 /** 5145 * xmlParsePITarget: 5146 * @ctxt: an XML parser context 5147 * 5148 * parse the name of a PI 5149 * 5150 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 5151 * 5152 * Returns the PITarget name or NULL 5153 */ 5154 5155 const xmlChar * 5156 xmlParsePITarget(xmlParserCtxtPtr ctxt) { 5157 const xmlChar *name; 5158 5159 name = xmlParseName(ctxt); 5160 if ((name != NULL) && 5161 ((name[0] == 'x') || (name[0] == 'X')) && 5162 ((name[1] == 'm') || (name[1] == 'M')) && 5163 ((name[2] == 'l') || (name[2] == 'L'))) { 5164 int i; 5165 if ((name[0] == 'x') && (name[1] == 'm') && 5166 (name[2] == 'l') && (name[3] == 0)) { 5167 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5168 "XML declaration allowed only at the start of the document\n"); 5169 return(name); 5170 } else if (name[3] == 0) { 5171 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 5172 return(name); 5173 } 5174 for (i = 0;;i++) { 5175 if (xmlW3CPIs[i] == NULL) break; 5176 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 5177 return(name); 5178 } 5179 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 5180 "xmlParsePITarget: invalid name prefix 'xml'\n", 5181 NULL, NULL); 5182 } 5183 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 5184 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5185 "colons are forbidden from PI names '%s'\n", name, NULL, NULL); 5186 } 5187 return(name); 5188 } 5189 5190 #ifdef LIBXML_CATALOG_ENABLED 5191 /** 5192 * xmlParseCatalogPI: 5193 * @ctxt: an XML parser context 5194 * @catalog: the PI value string 5195 * 5196 * parse an XML Catalog Processing Instruction. 5197 * 5198 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 5199 * 5200 * Occurs only if allowed by the user and if happening in the Misc 5201 * part of the document before any doctype informations 5202 * This will add the given catalog to the parsing context in order 5203 * to be used if there is a resolution need further down in the document 5204 */ 5205 5206 static void 5207 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 5208 xmlChar *URL = NULL; 5209 const xmlChar *tmp, *base; 5210 xmlChar marker; 5211 5212 tmp = catalog; 5213 while (IS_BLANK_CH(*tmp)) tmp++; 5214 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 5215 goto error; 5216 tmp += 7; 5217 while (IS_BLANK_CH(*tmp)) tmp++; 5218 if (*tmp != '=') { 5219 return; 5220 } 5221 tmp++; 5222 while (IS_BLANK_CH(*tmp)) tmp++; 5223 marker = *tmp; 5224 if ((marker != '\'') && (marker != '"')) 5225 goto error; 5226 tmp++; 5227 base = tmp; 5228 while ((*tmp != 0) && (*tmp != marker)) tmp++; 5229 if (*tmp == 0) 5230 goto error; 5231 URL = xmlStrndup(base, tmp - base); 5232 tmp++; 5233 while (IS_BLANK_CH(*tmp)) tmp++; 5234 if (*tmp != 0) 5235 goto error; 5236 5237 if (URL != NULL) { 5238 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 5239 xmlFree(URL); 5240 } 5241 return; 5242 5243 error: 5244 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 5245 "Catalog PI syntax error: %s\n", 5246 catalog, NULL); 5247 if (URL != NULL) 5248 xmlFree(URL); 5249 } 5250 #endif 5251 5252 /** 5253 * xmlParsePI: 5254 * @ctxt: an XML parser context 5255 * 5256 * parse an XML Processing Instruction. 5257 * 5258 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 5259 * 5260 * The processing is transfered to SAX once parsed. 5261 */ 5262 5263 void 5264 xmlParsePI(xmlParserCtxtPtr ctxt) { 5265 xmlChar *buf = NULL; 5266 size_t len = 0; 5267 size_t size = XML_PARSER_BUFFER_SIZE; 5268 int cur, l; 5269 const xmlChar *target; 5270 xmlParserInputState state; 5271 int count = 0; 5272 5273 if ((RAW == '<') && (NXT(1) == '?')) { 5274 xmlParserInputPtr input = ctxt->input; 5275 state = ctxt->instate; 5276 ctxt->instate = XML_PARSER_PI; 5277 /* 5278 * this is a Processing Instruction. 5279 */ 5280 SKIP(2); 5281 SHRINK; 5282 5283 /* 5284 * Parse the target name and check for special support like 5285 * namespace. 5286 */ 5287 target = xmlParsePITarget(ctxt); 5288 if (target != NULL) { 5289 if ((RAW == '?') && (NXT(1) == '>')) { 5290 if (input != ctxt->input) { 5291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5292 "PI declaration doesn't start and stop in the same entity\n"); 5293 } 5294 SKIP(2); 5295 5296 /* 5297 * SAX: PI detected. 5298 */ 5299 if ((ctxt->sax) && (!ctxt->disableSAX) && 5300 (ctxt->sax->processingInstruction != NULL)) 5301 ctxt->sax->processingInstruction(ctxt->userData, 5302 target, NULL); 5303 if (ctxt->instate != XML_PARSER_EOF) 5304 ctxt->instate = state; 5305 return; 5306 } 5307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 5308 if (buf == NULL) { 5309 xmlErrMemory(ctxt, NULL); 5310 ctxt->instate = state; 5311 return; 5312 } 5313 cur = CUR; 5314 if (!IS_BLANK(cur)) { 5315 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 5316 "ParsePI: PI %s space expected\n", target); 5317 } 5318 SKIP_BLANKS; 5319 cur = CUR_CHAR(l); 5320 while (IS_CHAR(cur) && /* checked */ 5321 ((cur != '?') || (NXT(1) != '>'))) { 5322 if (len + 5 >= size) { 5323 xmlChar *tmp; 5324 size_t new_size = size * 2; 5325 tmp = (xmlChar *) xmlRealloc(buf, new_size); 5326 if (tmp == NULL) { 5327 xmlErrMemory(ctxt, NULL); 5328 xmlFree(buf); 5329 ctxt->instate = state; 5330 return; 5331 } 5332 buf = tmp; 5333 size = new_size; 5334 } 5335 count++; 5336 if (count > 50) { 5337 GROW; 5338 if (ctxt->instate == XML_PARSER_EOF) { 5339 xmlFree(buf); 5340 return; 5341 } 5342 count = 0; 5343 if ((len > XML_MAX_TEXT_LENGTH) && 5344 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5346 "PI %s too big found", target); 5347 xmlFree(buf); 5348 ctxt->instate = state; 5349 return; 5350 } 5351 } 5352 COPY_BUF(l,buf,len,cur); 5353 NEXTL(l); 5354 cur = CUR_CHAR(l); 5355 if (cur == 0) { 5356 SHRINK; 5357 GROW; 5358 cur = CUR_CHAR(l); 5359 } 5360 } 5361 if ((len > XML_MAX_TEXT_LENGTH) && 5362 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 5363 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5364 "PI %s too big found", target); 5365 xmlFree(buf); 5366 ctxt->instate = state; 5367 return; 5368 } 5369 buf[len] = 0; 5370 if (cur != '?') { 5371 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 5372 "ParsePI: PI %s never end ...\n", target); 5373 } else { 5374 if (input != ctxt->input) { 5375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5376 "PI declaration doesn't start and stop in the same entity\n"); 5377 } 5378 SKIP(2); 5379 5380 #ifdef LIBXML_CATALOG_ENABLED 5381 if (((state == XML_PARSER_MISC) || 5382 (state == XML_PARSER_START)) && 5383 (xmlStrEqual(target, XML_CATALOG_PI))) { 5384 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 5385 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 5386 (allow == XML_CATA_ALLOW_ALL)) 5387 xmlParseCatalogPI(ctxt, buf); 5388 } 5389 #endif 5390 5391 5392 /* 5393 * SAX: PI detected. 5394 */ 5395 if ((ctxt->sax) && (!ctxt->disableSAX) && 5396 (ctxt->sax->processingInstruction != NULL)) 5397 ctxt->sax->processingInstruction(ctxt->userData, 5398 target, buf); 5399 } 5400 xmlFree(buf); 5401 } else { 5402 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 5403 } 5404 if (ctxt->instate != XML_PARSER_EOF) 5405 ctxt->instate = state; 5406 } 5407 } 5408 5409 /** 5410 * xmlParseNotationDecl: 5411 * @ctxt: an XML parser context 5412 * 5413 * parse a notation declaration 5414 * 5415 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 5416 * 5417 * Hence there is actually 3 choices: 5418 * 'PUBLIC' S PubidLiteral 5419 * 'PUBLIC' S PubidLiteral S SystemLiteral 5420 * and 'SYSTEM' S SystemLiteral 5421 * 5422 * See the NOTE on xmlParseExternalID(). 5423 */ 5424 5425 void 5426 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 5427 const xmlChar *name; 5428 xmlChar *Pubid; 5429 xmlChar *Systemid; 5430 5431 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5432 xmlParserInputPtr input = ctxt->input; 5433 SHRINK; 5434 SKIP(10); 5435 if (!IS_BLANK_CH(CUR)) { 5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5437 "Space required after '<!NOTATION'\n"); 5438 return; 5439 } 5440 SKIP_BLANKS; 5441 5442 name = xmlParseName(ctxt); 5443 if (name == NULL) { 5444 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5445 return; 5446 } 5447 if (!IS_BLANK_CH(CUR)) { 5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5449 "Space required after the NOTATION name'\n"); 5450 return; 5451 } 5452 if (xmlStrchr(name, ':') != NULL) { 5453 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5454 "colons are forbidden from notation names '%s'\n", 5455 name, NULL, NULL); 5456 } 5457 SKIP_BLANKS; 5458 5459 /* 5460 * Parse the IDs. 5461 */ 5462 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 5463 SKIP_BLANKS; 5464 5465 if (RAW == '>') { 5466 if (input != ctxt->input) { 5467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5468 "Notation declaration doesn't start and stop in the same entity\n"); 5469 } 5470 NEXT; 5471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5472 (ctxt->sax->notationDecl != NULL)) 5473 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 5474 } else { 5475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5476 } 5477 if (Systemid != NULL) xmlFree(Systemid); 5478 if (Pubid != NULL) xmlFree(Pubid); 5479 } 5480 } 5481 5482 /** 5483 * xmlParseEntityDecl: 5484 * @ctxt: an XML parser context 5485 * 5486 * parse <!ENTITY declarations 5487 * 5488 * [70] EntityDecl ::= GEDecl | PEDecl 5489 * 5490 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 5491 * 5492 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 5493 * 5494 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 5495 * 5496 * [74] PEDef ::= EntityValue | ExternalID 5497 * 5498 * [76] NDataDecl ::= S 'NDATA' S Name 5499 * 5500 * [ VC: Notation Declared ] 5501 * The Name must match the declared name of a notation. 5502 */ 5503 5504 void 5505 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5506 const xmlChar *name = NULL; 5507 xmlChar *value = NULL; 5508 xmlChar *URI = NULL, *literal = NULL; 5509 const xmlChar *ndata = NULL; 5510 int isParameter = 0; 5511 xmlChar *orig = NULL; 5512 int skipped; 5513 5514 /* GROW; done in the caller */ 5515 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5516 xmlParserInputPtr input = ctxt->input; 5517 SHRINK; 5518 SKIP(8); 5519 skipped = SKIP_BLANKS; 5520 if (skipped == 0) { 5521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5522 "Space required after '<!ENTITY'\n"); 5523 } 5524 5525 if (RAW == '%') { 5526 NEXT; 5527 skipped = SKIP_BLANKS; 5528 if (skipped == 0) { 5529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5530 "Space required after '%%'\n"); 5531 } 5532 isParameter = 1; 5533 } 5534 5535 name = xmlParseName(ctxt); 5536 if (name == NULL) { 5537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5538 "xmlParseEntityDecl: no name\n"); 5539 return; 5540 } 5541 if (xmlStrchr(name, ':') != NULL) { 5542 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5543 "colons are forbidden from entities names '%s'\n", 5544 name, NULL, NULL); 5545 } 5546 skipped = SKIP_BLANKS; 5547 if (skipped == 0) { 5548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5549 "Space required after the entity name\n"); 5550 } 5551 5552 ctxt->instate = XML_PARSER_ENTITY_DECL; 5553 /* 5554 * handle the various case of definitions... 5555 */ 5556 if (isParameter) { 5557 if ((RAW == '"') || (RAW == '\'')) { 5558 value = xmlParseEntityValue(ctxt, &orig); 5559 if (value) { 5560 if ((ctxt->sax != NULL) && 5561 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5562 ctxt->sax->entityDecl(ctxt->userData, name, 5563 XML_INTERNAL_PARAMETER_ENTITY, 5564 NULL, NULL, value); 5565 } 5566 } else { 5567 URI = xmlParseExternalID(ctxt, &literal, 1); 5568 if ((URI == NULL) && (literal == NULL)) { 5569 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5570 } 5571 if (URI) { 5572 xmlURIPtr uri; 5573 5574 uri = xmlParseURI((const char *) URI); 5575 if (uri == NULL) { 5576 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5577 "Invalid URI: %s\n", URI); 5578 /* 5579 * This really ought to be a well formedness error 5580 * but the XML Core WG decided otherwise c.f. issue 5581 * E26 of the XML erratas. 5582 */ 5583 } else { 5584 if (uri->fragment != NULL) { 5585 /* 5586 * Okay this is foolish to block those but not 5587 * invalid URIs. 5588 */ 5589 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5590 } else { 5591 if ((ctxt->sax != NULL) && 5592 (!ctxt->disableSAX) && 5593 (ctxt->sax->entityDecl != NULL)) 5594 ctxt->sax->entityDecl(ctxt->userData, name, 5595 XML_EXTERNAL_PARAMETER_ENTITY, 5596 literal, URI, NULL); 5597 } 5598 xmlFreeURI(uri); 5599 } 5600 } 5601 } 5602 } else { 5603 if ((RAW == '"') || (RAW == '\'')) { 5604 value = xmlParseEntityValue(ctxt, &orig); 5605 if ((ctxt->sax != NULL) && 5606 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5607 ctxt->sax->entityDecl(ctxt->userData, name, 5608 XML_INTERNAL_GENERAL_ENTITY, 5609 NULL, NULL, value); 5610 /* 5611 * For expat compatibility in SAX mode. 5612 */ 5613 if ((ctxt->myDoc == NULL) || 5614 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5615 if (ctxt->myDoc == NULL) { 5616 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5617 if (ctxt->myDoc == NULL) { 5618 xmlErrMemory(ctxt, "New Doc failed"); 5619 return; 5620 } 5621 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5622 } 5623 if (ctxt->myDoc->intSubset == NULL) 5624 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5625 BAD_CAST "fake", NULL, NULL); 5626 5627 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5628 NULL, NULL, value); 5629 } 5630 } else { 5631 URI = xmlParseExternalID(ctxt, &literal, 1); 5632 if ((URI == NULL) && (literal == NULL)) { 5633 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5634 } 5635 if (URI) { 5636 xmlURIPtr uri; 5637 5638 uri = xmlParseURI((const char *)URI); 5639 if (uri == NULL) { 5640 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5641 "Invalid URI: %s\n", URI); 5642 /* 5643 * This really ought to be a well formedness error 5644 * but the XML Core WG decided otherwise c.f. issue 5645 * E26 of the XML erratas. 5646 */ 5647 } else { 5648 if (uri->fragment != NULL) { 5649 /* 5650 * Okay this is foolish to block those but not 5651 * invalid URIs. 5652 */ 5653 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5654 } 5655 xmlFreeURI(uri); 5656 } 5657 } 5658 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5660 "Space required before 'NDATA'\n"); 5661 } 5662 SKIP_BLANKS; 5663 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5664 SKIP(5); 5665 if (!IS_BLANK_CH(CUR)) { 5666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5667 "Space required after 'NDATA'\n"); 5668 } 5669 SKIP_BLANKS; 5670 ndata = xmlParseName(ctxt); 5671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5672 (ctxt->sax->unparsedEntityDecl != NULL)) 5673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5674 literal, URI, ndata); 5675 } else { 5676 if ((ctxt->sax != NULL) && 5677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5678 ctxt->sax->entityDecl(ctxt->userData, name, 5679 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5680 literal, URI, NULL); 5681 /* 5682 * For expat compatibility in SAX mode. 5683 * assuming the entity repalcement was asked for 5684 */ 5685 if ((ctxt->replaceEntities != 0) && 5686 ((ctxt->myDoc == NULL) || 5687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5688 if (ctxt->myDoc == NULL) { 5689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5690 if (ctxt->myDoc == NULL) { 5691 xmlErrMemory(ctxt, "New Doc failed"); 5692 return; 5693 } 5694 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5695 } 5696 5697 if (ctxt->myDoc->intSubset == NULL) 5698 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5699 BAD_CAST "fake", NULL, NULL); 5700 xmlSAX2EntityDecl(ctxt, name, 5701 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5702 literal, URI, NULL); 5703 } 5704 } 5705 } 5706 } 5707 if (ctxt->instate == XML_PARSER_EOF) 5708 return; 5709 SKIP_BLANKS; 5710 if (RAW != '>') { 5711 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5712 "xmlParseEntityDecl: entity %s not terminated\n", name); 5713 xmlHaltParser(ctxt); 5714 } else { 5715 if (input != ctxt->input) { 5716 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5717 "Entity declaration doesn't start and stop in the same entity\n"); 5718 } 5719 NEXT; 5720 } 5721 if (orig != NULL) { 5722 /* 5723 * Ugly mechanism to save the raw entity value. 5724 */ 5725 xmlEntityPtr cur = NULL; 5726 5727 if (isParameter) { 5728 if ((ctxt->sax != NULL) && 5729 (ctxt->sax->getParameterEntity != NULL)) 5730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5731 } else { 5732 if ((ctxt->sax != NULL) && 5733 (ctxt->sax->getEntity != NULL)) 5734 cur = ctxt->sax->getEntity(ctxt->userData, name); 5735 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5736 cur = xmlSAX2GetEntity(ctxt, name); 5737 } 5738 } 5739 if (cur != NULL) { 5740 if (cur->orig != NULL) 5741 xmlFree(orig); 5742 else 5743 cur->orig = orig; 5744 } else 5745 xmlFree(orig); 5746 } 5747 if (value != NULL) xmlFree(value); 5748 if (URI != NULL) xmlFree(URI); 5749 if (literal != NULL) xmlFree(literal); 5750 } 5751 } 5752 5753 /** 5754 * xmlParseDefaultDecl: 5755 * @ctxt: an XML parser context 5756 * @value: Receive a possible fixed default value for the attribute 5757 * 5758 * Parse an attribute default declaration 5759 * 5760 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5761 * 5762 * [ VC: Required Attribute ] 5763 * if the default declaration is the keyword #REQUIRED, then the 5764 * attribute must be specified for all elements of the type in the 5765 * attribute-list declaration. 5766 * 5767 * [ VC: Attribute Default Legal ] 5768 * The declared default value must meet the lexical constraints of 5769 * the declared attribute type c.f. xmlValidateAttributeDecl() 5770 * 5771 * [ VC: Fixed Attribute Default ] 5772 * if an attribute has a default value declared with the #FIXED 5773 * keyword, instances of that attribute must match the default value. 5774 * 5775 * [ WFC: No < in Attribute Values ] 5776 * handled in xmlParseAttValue() 5777 * 5778 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5779 * or XML_ATTRIBUTE_FIXED. 5780 */ 5781 5782 int 5783 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5784 int val; 5785 xmlChar *ret; 5786 5787 *value = NULL; 5788 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5789 SKIP(9); 5790 return(XML_ATTRIBUTE_REQUIRED); 5791 } 5792 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5793 SKIP(8); 5794 return(XML_ATTRIBUTE_IMPLIED); 5795 } 5796 val = XML_ATTRIBUTE_NONE; 5797 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5798 SKIP(6); 5799 val = XML_ATTRIBUTE_FIXED; 5800 if (!IS_BLANK_CH(CUR)) { 5801 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5802 "Space required after '#FIXED'\n"); 5803 } 5804 SKIP_BLANKS; 5805 } 5806 ret = xmlParseAttValue(ctxt); 5807 ctxt->instate = XML_PARSER_DTD; 5808 if (ret == NULL) { 5809 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5810 "Attribute default value declaration error\n"); 5811 } else 5812 *value = ret; 5813 return(val); 5814 } 5815 5816 /** 5817 * xmlParseNotationType: 5818 * @ctxt: an XML parser context 5819 * 5820 * parse an Notation attribute type. 5821 * 5822 * Note: the leading 'NOTATION' S part has already being parsed... 5823 * 5824 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5825 * 5826 * [ VC: Notation Attributes ] 5827 * Values of this type must match one of the notation names included 5828 * in the declaration; all notation names in the declaration must be declared. 5829 * 5830 * Returns: the notation attribute tree built while parsing 5831 */ 5832 5833 xmlEnumerationPtr 5834 xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5835 const xmlChar *name; 5836 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5837 5838 if (RAW != '(') { 5839 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5840 return(NULL); 5841 } 5842 SHRINK; 5843 do { 5844 NEXT; 5845 SKIP_BLANKS; 5846 name = xmlParseName(ctxt); 5847 if (name == NULL) { 5848 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5849 "Name expected in NOTATION declaration\n"); 5850 xmlFreeEnumeration(ret); 5851 return(NULL); 5852 } 5853 tmp = ret; 5854 while (tmp != NULL) { 5855 if (xmlStrEqual(name, tmp->name)) { 5856 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5857 "standalone: attribute notation value token %s duplicated\n", 5858 name, NULL); 5859 if (!xmlDictOwns(ctxt->dict, name)) 5860 xmlFree((xmlChar *) name); 5861 break; 5862 } 5863 tmp = tmp->next; 5864 } 5865 if (tmp == NULL) { 5866 cur = xmlCreateEnumeration(name); 5867 if (cur == NULL) { 5868 xmlFreeEnumeration(ret); 5869 return(NULL); 5870 } 5871 if (last == NULL) ret = last = cur; 5872 else { 5873 last->next = cur; 5874 last = cur; 5875 } 5876 } 5877 SKIP_BLANKS; 5878 } while (RAW == '|'); 5879 if (RAW != ')') { 5880 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5881 xmlFreeEnumeration(ret); 5882 return(NULL); 5883 } 5884 NEXT; 5885 return(ret); 5886 } 5887 5888 /** 5889 * xmlParseEnumerationType: 5890 * @ctxt: an XML parser context 5891 * 5892 * parse an Enumeration attribute type. 5893 * 5894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5895 * 5896 * [ VC: Enumeration ] 5897 * Values of this type must match one of the Nmtoken tokens in 5898 * the declaration 5899 * 5900 * Returns: the enumeration attribute tree built while parsing 5901 */ 5902 5903 xmlEnumerationPtr 5904 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5905 xmlChar *name; 5906 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5907 5908 if (RAW != '(') { 5909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5910 return(NULL); 5911 } 5912 SHRINK; 5913 do { 5914 NEXT; 5915 SKIP_BLANKS; 5916 name = xmlParseNmtoken(ctxt); 5917 if (name == NULL) { 5918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5919 return(ret); 5920 } 5921 tmp = ret; 5922 while (tmp != NULL) { 5923 if (xmlStrEqual(name, tmp->name)) { 5924 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5925 "standalone: attribute enumeration value token %s duplicated\n", 5926 name, NULL); 5927 if (!xmlDictOwns(ctxt->dict, name)) 5928 xmlFree(name); 5929 break; 5930 } 5931 tmp = tmp->next; 5932 } 5933 if (tmp == NULL) { 5934 cur = xmlCreateEnumeration(name); 5935 if (!xmlDictOwns(ctxt->dict, name)) 5936 xmlFree(name); 5937 if (cur == NULL) { 5938 xmlFreeEnumeration(ret); 5939 return(NULL); 5940 } 5941 if (last == NULL) ret = last = cur; 5942 else { 5943 last->next = cur; 5944 last = cur; 5945 } 5946 } 5947 SKIP_BLANKS; 5948 } while (RAW == '|'); 5949 if (RAW != ')') { 5950 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5951 return(ret); 5952 } 5953 NEXT; 5954 return(ret); 5955 } 5956 5957 /** 5958 * xmlParseEnumeratedType: 5959 * @ctxt: an XML parser context 5960 * @tree: the enumeration tree built while parsing 5961 * 5962 * parse an Enumerated attribute type. 5963 * 5964 * [57] EnumeratedType ::= NotationType | Enumeration 5965 * 5966 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5967 * 5968 * 5969 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5970 */ 5971 5972 int 5973 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5974 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5975 SKIP(8); 5976 if (!IS_BLANK_CH(CUR)) { 5977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5978 "Space required after 'NOTATION'\n"); 5979 return(0); 5980 } 5981 SKIP_BLANKS; 5982 *tree = xmlParseNotationType(ctxt); 5983 if (*tree == NULL) return(0); 5984 return(XML_ATTRIBUTE_NOTATION); 5985 } 5986 *tree = xmlParseEnumerationType(ctxt); 5987 if (*tree == NULL) return(0); 5988 return(XML_ATTRIBUTE_ENUMERATION); 5989 } 5990 5991 /** 5992 * xmlParseAttributeType: 5993 * @ctxt: an XML parser context 5994 * @tree: the enumeration tree built while parsing 5995 * 5996 * parse the Attribute list def for an element 5997 * 5998 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5999 * 6000 * [55] StringType ::= 'CDATA' 6001 * 6002 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 6003 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 6004 * 6005 * Validity constraints for attribute values syntax are checked in 6006 * xmlValidateAttributeValue() 6007 * 6008 * [ VC: ID ] 6009 * Values of type ID must match the Name production. A name must not 6010 * appear more than once in an XML document as a value of this type; 6011 * i.e., ID values must uniquely identify the elements which bear them. 6012 * 6013 * [ VC: One ID per Element Type ] 6014 * No element type may have more than one ID attribute specified. 6015 * 6016 * [ VC: ID Attribute Default ] 6017 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 6018 * 6019 * [ VC: IDREF ] 6020 * Values of type IDREF must match the Name production, and values 6021 * of type IDREFS must match Names; each IDREF Name must match the value 6022 * of an ID attribute on some element in the XML document; i.e. IDREF 6023 * values must match the value of some ID attribute. 6024 * 6025 * [ VC: Entity Name ] 6026 * Values of type ENTITY must match the Name production, values 6027 * of type ENTITIES must match Names; each Entity Name must match the 6028 * name of an unparsed entity declared in the DTD. 6029 * 6030 * [ VC: Name Token ] 6031 * Values of type NMTOKEN must match the Nmtoken production; values 6032 * of type NMTOKENS must match Nmtokens. 6033 * 6034 * Returns the attribute type 6035 */ 6036 int 6037 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 6038 SHRINK; 6039 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 6040 SKIP(5); 6041 return(XML_ATTRIBUTE_CDATA); 6042 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 6043 SKIP(6); 6044 return(XML_ATTRIBUTE_IDREFS); 6045 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 6046 SKIP(5); 6047 return(XML_ATTRIBUTE_IDREF); 6048 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 6049 SKIP(2); 6050 return(XML_ATTRIBUTE_ID); 6051 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 6052 SKIP(6); 6053 return(XML_ATTRIBUTE_ENTITY); 6054 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 6055 SKIP(8); 6056 return(XML_ATTRIBUTE_ENTITIES); 6057 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 6058 SKIP(8); 6059 return(XML_ATTRIBUTE_NMTOKENS); 6060 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 6061 SKIP(7); 6062 return(XML_ATTRIBUTE_NMTOKEN); 6063 } 6064 return(xmlParseEnumeratedType(ctxt, tree)); 6065 } 6066 6067 /** 6068 * xmlParseAttributeListDecl: 6069 * @ctxt: an XML parser context 6070 * 6071 * : parse the Attribute list def for an element 6072 * 6073 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 6074 * 6075 * [53] AttDef ::= S Name S AttType S DefaultDecl 6076 * 6077 */ 6078 void 6079 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 6080 const xmlChar *elemName; 6081 const xmlChar *attrName; 6082 xmlEnumerationPtr tree; 6083 6084 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 6085 xmlParserInputPtr input = ctxt->input; 6086 6087 SKIP(9); 6088 if (!IS_BLANK_CH(CUR)) { 6089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6090 "Space required after '<!ATTLIST'\n"); 6091 } 6092 SKIP_BLANKS; 6093 elemName = xmlParseName(ctxt); 6094 if (elemName == NULL) { 6095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6096 "ATTLIST: no name for Element\n"); 6097 return; 6098 } 6099 SKIP_BLANKS; 6100 GROW; 6101 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) { 6102 const xmlChar *check = CUR_PTR; 6103 int type; 6104 int def; 6105 xmlChar *defaultValue = NULL; 6106 6107 GROW; 6108 tree = NULL; 6109 attrName = xmlParseName(ctxt); 6110 if (attrName == NULL) { 6111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6112 "ATTLIST: no name for Attribute\n"); 6113 break; 6114 } 6115 GROW; 6116 if (!IS_BLANK_CH(CUR)) { 6117 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6118 "Space required after the attribute name\n"); 6119 break; 6120 } 6121 SKIP_BLANKS; 6122 6123 type = xmlParseAttributeType(ctxt, &tree); 6124 if (type <= 0) { 6125 break; 6126 } 6127 6128 GROW; 6129 if (!IS_BLANK_CH(CUR)) { 6130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6131 "Space required after the attribute type\n"); 6132 if (tree != NULL) 6133 xmlFreeEnumeration(tree); 6134 break; 6135 } 6136 SKIP_BLANKS; 6137 6138 def = xmlParseDefaultDecl(ctxt, &defaultValue); 6139 if (def <= 0) { 6140 if (defaultValue != NULL) 6141 xmlFree(defaultValue); 6142 if (tree != NULL) 6143 xmlFreeEnumeration(tree); 6144 break; 6145 } 6146 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 6147 xmlAttrNormalizeSpace(defaultValue, defaultValue); 6148 6149 GROW; 6150 if (RAW != '>') { 6151 if (!IS_BLANK_CH(CUR)) { 6152 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6153 "Space required after the attribute default value\n"); 6154 if (defaultValue != NULL) 6155 xmlFree(defaultValue); 6156 if (tree != NULL) 6157 xmlFreeEnumeration(tree); 6158 break; 6159 } 6160 SKIP_BLANKS; 6161 } 6162 if (check == CUR_PTR) { 6163 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6164 "in xmlParseAttributeListDecl\n"); 6165 if (defaultValue != NULL) 6166 xmlFree(defaultValue); 6167 if (tree != NULL) 6168 xmlFreeEnumeration(tree); 6169 break; 6170 } 6171 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6172 (ctxt->sax->attributeDecl != NULL)) 6173 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 6174 type, def, defaultValue, tree); 6175 else if (tree != NULL) 6176 xmlFreeEnumeration(tree); 6177 6178 if ((ctxt->sax2) && (defaultValue != NULL) && 6179 (def != XML_ATTRIBUTE_IMPLIED) && 6180 (def != XML_ATTRIBUTE_REQUIRED)) { 6181 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 6182 } 6183 if (ctxt->sax2) { 6184 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 6185 } 6186 if (defaultValue != NULL) 6187 xmlFree(defaultValue); 6188 GROW; 6189 } 6190 if (RAW == '>') { 6191 if (input != ctxt->input) { 6192 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6193 "Attribute list declaration doesn't start and stop in the same entity\n", 6194 NULL, NULL); 6195 } 6196 NEXT; 6197 } 6198 } 6199 } 6200 6201 /** 6202 * xmlParseElementMixedContentDecl: 6203 * @ctxt: an XML parser context 6204 * @inputchk: the input used for the current entity, needed for boundary checks 6205 * 6206 * parse the declaration for a Mixed Element content 6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6208 * 6209 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 6210 * '(' S? '#PCDATA' S? ')' 6211 * 6212 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 6213 * 6214 * [ VC: No Duplicate Types ] 6215 * The same name must not appear more than once in a single 6216 * mixed-content declaration. 6217 * 6218 * returns: the list of the xmlElementContentPtr describing the element choices 6219 */ 6220 xmlElementContentPtr 6221 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6222 xmlElementContentPtr ret = NULL, cur = NULL, n; 6223 const xmlChar *elem = NULL; 6224 6225 GROW; 6226 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6227 SKIP(7); 6228 SKIP_BLANKS; 6229 SHRINK; 6230 if (RAW == ')') { 6231 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6232 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6233 "Element content declaration doesn't start and stop in the same entity\n", 6234 NULL, NULL); 6235 } 6236 NEXT; 6237 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6238 if (ret == NULL) 6239 return(NULL); 6240 if (RAW == '*') { 6241 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6242 NEXT; 6243 } 6244 return(ret); 6245 } 6246 if ((RAW == '(') || (RAW == '|')) { 6247 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 6248 if (ret == NULL) return(NULL); 6249 } 6250 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) { 6251 NEXT; 6252 if (elem == NULL) { 6253 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6254 if (ret == NULL) return(NULL); 6255 ret->c1 = cur; 6256 if (cur != NULL) 6257 cur->parent = ret; 6258 cur = ret; 6259 } else { 6260 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6261 if (n == NULL) return(NULL); 6262 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6263 if (n->c1 != NULL) 6264 n->c1->parent = n; 6265 cur->c2 = n; 6266 if (n != NULL) 6267 n->parent = cur; 6268 cur = n; 6269 } 6270 SKIP_BLANKS; 6271 elem = xmlParseName(ctxt); 6272 if (elem == NULL) { 6273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6274 "xmlParseElementMixedContentDecl : Name expected\n"); 6275 xmlFreeDocElementContent(ctxt->myDoc, cur); 6276 return(NULL); 6277 } 6278 SKIP_BLANKS; 6279 GROW; 6280 } 6281 if ((RAW == ')') && (NXT(1) == '*')) { 6282 if (elem != NULL) { 6283 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 6284 XML_ELEMENT_CONTENT_ELEMENT); 6285 if (cur->c2 != NULL) 6286 cur->c2->parent = cur; 6287 } 6288 if (ret != NULL) 6289 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6290 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6291 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6292 "Element content declaration doesn't start and stop in the same entity\n", 6293 NULL, NULL); 6294 } 6295 SKIP(2); 6296 } else { 6297 xmlFreeDocElementContent(ctxt->myDoc, ret); 6298 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 6299 return(NULL); 6300 } 6301 6302 } else { 6303 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 6304 } 6305 return(ret); 6306 } 6307 6308 /** 6309 * xmlParseElementChildrenContentDeclPriv: 6310 * @ctxt: an XML parser context 6311 * @inputchk: the input used for the current entity, needed for boundary checks 6312 * @depth: the level of recursion 6313 * 6314 * parse the declaration for a Mixed Element content 6315 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6316 * 6317 * 6318 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6319 * 6320 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6321 * 6322 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6323 * 6324 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6325 * 6326 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6327 * TODO Parameter-entity replacement text must be properly nested 6328 * with parenthesized groups. That is to say, if either of the 6329 * opening or closing parentheses in a choice, seq, or Mixed 6330 * construct is contained in the replacement text for a parameter 6331 * entity, both must be contained in the same replacement text. For 6332 * interoperability, if a parameter-entity reference appears in a 6333 * choice, seq, or Mixed construct, its replacement text should not 6334 * be empty, and neither the first nor last non-blank character of 6335 * the replacement text should be a connector (| or ,). 6336 * 6337 * Returns the tree of xmlElementContentPtr describing the element 6338 * hierarchy. 6339 */ 6340 static xmlElementContentPtr 6341 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 6342 int depth) { 6343 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 6344 const xmlChar *elem; 6345 xmlChar type = 0; 6346 6347 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 6348 (depth > 2048)) { 6349 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 6350 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 6351 depth); 6352 return(NULL); 6353 } 6354 SKIP_BLANKS; 6355 GROW; 6356 if (RAW == '(') { 6357 int inputid = ctxt->input->id; 6358 6359 /* Recurse on first child */ 6360 NEXT; 6361 SKIP_BLANKS; 6362 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6363 depth + 1); 6364 SKIP_BLANKS; 6365 GROW; 6366 } else { 6367 elem = xmlParseName(ctxt); 6368 if (elem == NULL) { 6369 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6370 return(NULL); 6371 } 6372 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6373 if (cur == NULL) { 6374 xmlErrMemory(ctxt, NULL); 6375 return(NULL); 6376 } 6377 GROW; 6378 if (RAW == '?') { 6379 cur->ocur = XML_ELEMENT_CONTENT_OPT; 6380 NEXT; 6381 } else if (RAW == '*') { 6382 cur->ocur = XML_ELEMENT_CONTENT_MULT; 6383 NEXT; 6384 } else if (RAW == '+') { 6385 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 6386 NEXT; 6387 } else { 6388 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 6389 } 6390 GROW; 6391 } 6392 SKIP_BLANKS; 6393 SHRINK; 6394 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) { 6395 /* 6396 * Each loop we parse one separator and one element. 6397 */ 6398 if (RAW == ',') { 6399 if (type == 0) type = CUR; 6400 6401 /* 6402 * Detect "Name | Name , Name" error 6403 */ 6404 else if (type != CUR) { 6405 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6406 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6407 type); 6408 if ((last != NULL) && (last != ret)) 6409 xmlFreeDocElementContent(ctxt->myDoc, last); 6410 if (ret != NULL) 6411 xmlFreeDocElementContent(ctxt->myDoc, ret); 6412 return(NULL); 6413 } 6414 NEXT; 6415 6416 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 6417 if (op == NULL) { 6418 if ((last != NULL) && (last != ret)) 6419 xmlFreeDocElementContent(ctxt->myDoc, last); 6420 xmlFreeDocElementContent(ctxt->myDoc, ret); 6421 return(NULL); 6422 } 6423 if (last == NULL) { 6424 op->c1 = ret; 6425 if (ret != NULL) 6426 ret->parent = op; 6427 ret = cur = op; 6428 } else { 6429 cur->c2 = op; 6430 if (op != NULL) 6431 op->parent = cur; 6432 op->c1 = last; 6433 if (last != NULL) 6434 last->parent = op; 6435 cur =op; 6436 last = NULL; 6437 } 6438 } else if (RAW == '|') { 6439 if (type == 0) type = CUR; 6440 6441 /* 6442 * Detect "Name , Name | Name" error 6443 */ 6444 else if (type != CUR) { 6445 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 6446 "xmlParseElementChildrenContentDecl : '%c' expected\n", 6447 type); 6448 if ((last != NULL) && (last != ret)) 6449 xmlFreeDocElementContent(ctxt->myDoc, last); 6450 if (ret != NULL) 6451 xmlFreeDocElementContent(ctxt->myDoc, ret); 6452 return(NULL); 6453 } 6454 NEXT; 6455 6456 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 6457 if (op == NULL) { 6458 if ((last != NULL) && (last != ret)) 6459 xmlFreeDocElementContent(ctxt->myDoc, last); 6460 if (ret != NULL) 6461 xmlFreeDocElementContent(ctxt->myDoc, ret); 6462 return(NULL); 6463 } 6464 if (last == NULL) { 6465 op->c1 = ret; 6466 if (ret != NULL) 6467 ret->parent = op; 6468 ret = cur = op; 6469 } else { 6470 cur->c2 = op; 6471 if (op != NULL) 6472 op->parent = cur; 6473 op->c1 = last; 6474 if (last != NULL) 6475 last->parent = op; 6476 cur =op; 6477 last = NULL; 6478 } 6479 } else { 6480 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 6481 if ((last != NULL) && (last != ret)) 6482 xmlFreeDocElementContent(ctxt->myDoc, last); 6483 if (ret != NULL) 6484 xmlFreeDocElementContent(ctxt->myDoc, ret); 6485 return(NULL); 6486 } 6487 GROW; 6488 SKIP_BLANKS; 6489 GROW; 6490 if (RAW == '(') { 6491 int inputid = ctxt->input->id; 6492 /* Recurse on second child */ 6493 NEXT; 6494 SKIP_BLANKS; 6495 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 6496 depth + 1); 6497 SKIP_BLANKS; 6498 } else { 6499 elem = xmlParseName(ctxt); 6500 if (elem == NULL) { 6501 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 6502 if (ret != NULL) 6503 xmlFreeDocElementContent(ctxt->myDoc, ret); 6504 return(NULL); 6505 } 6506 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6507 if (last == NULL) { 6508 if (ret != NULL) 6509 xmlFreeDocElementContent(ctxt->myDoc, ret); 6510 return(NULL); 6511 } 6512 if (RAW == '?') { 6513 last->ocur = XML_ELEMENT_CONTENT_OPT; 6514 NEXT; 6515 } else if (RAW == '*') { 6516 last->ocur = XML_ELEMENT_CONTENT_MULT; 6517 NEXT; 6518 } else if (RAW == '+') { 6519 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6520 NEXT; 6521 } else { 6522 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6523 } 6524 } 6525 SKIP_BLANKS; 6526 GROW; 6527 } 6528 if ((cur != NULL) && (last != NULL)) { 6529 cur->c2 = last; 6530 if (last != NULL) 6531 last->parent = cur; 6532 } 6533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6535 "Element content declaration doesn't start and stop in the same entity\n", 6536 NULL, NULL); 6537 } 6538 NEXT; 6539 if (RAW == '?') { 6540 if (ret != NULL) { 6541 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6542 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6543 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6544 else 6545 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6546 } 6547 NEXT; 6548 } else if (RAW == '*') { 6549 if (ret != NULL) { 6550 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6551 cur = ret; 6552 /* 6553 * Some normalization: 6554 * (a | b* | c?)* == (a | b | c)* 6555 */ 6556 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6557 if ((cur->c1 != NULL) && 6558 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6559 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6560 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6561 if ((cur->c2 != NULL) && 6562 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6563 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6564 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6565 cur = cur->c2; 6566 } 6567 } 6568 NEXT; 6569 } else if (RAW == '+') { 6570 if (ret != NULL) { 6571 int found = 0; 6572 6573 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6574 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6575 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6576 else 6577 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6578 /* 6579 * Some normalization: 6580 * (a | b*)+ == (a | b)* 6581 * (a | b?)+ == (a | b)* 6582 */ 6583 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6584 if ((cur->c1 != NULL) && 6585 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6586 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6587 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6588 found = 1; 6589 } 6590 if ((cur->c2 != NULL) && 6591 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6592 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6593 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6594 found = 1; 6595 } 6596 cur = cur->c2; 6597 } 6598 if (found) 6599 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6600 } 6601 NEXT; 6602 } 6603 return(ret); 6604 } 6605 6606 /** 6607 * xmlParseElementChildrenContentDecl: 6608 * @ctxt: an XML parser context 6609 * @inputchk: the input used for the current entity, needed for boundary checks 6610 * 6611 * parse the declaration for a Mixed Element content 6612 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6613 * 6614 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6615 * 6616 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6617 * 6618 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6619 * 6620 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6621 * 6622 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6623 * TODO Parameter-entity replacement text must be properly nested 6624 * with parenthesized groups. That is to say, if either of the 6625 * opening or closing parentheses in a choice, seq, or Mixed 6626 * construct is contained in the replacement text for a parameter 6627 * entity, both must be contained in the same replacement text. For 6628 * interoperability, if a parameter-entity reference appears in a 6629 * choice, seq, or Mixed construct, its replacement text should not 6630 * be empty, and neither the first nor last non-blank character of 6631 * the replacement text should be a connector (| or ,). 6632 * 6633 * Returns the tree of xmlElementContentPtr describing the element 6634 * hierarchy. 6635 */ 6636 xmlElementContentPtr 6637 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6638 /* stub left for API/ABI compat */ 6639 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6640 } 6641 6642 /** 6643 * xmlParseElementContentDecl: 6644 * @ctxt: an XML parser context 6645 * @name: the name of the element being defined. 6646 * @result: the Element Content pointer will be stored here if any 6647 * 6648 * parse the declaration for an Element content either Mixed or Children, 6649 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6650 * 6651 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6652 * 6653 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6654 */ 6655 6656 int 6657 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6658 xmlElementContentPtr *result) { 6659 6660 xmlElementContentPtr tree = NULL; 6661 int inputid = ctxt->input->id; 6662 int res; 6663 6664 *result = NULL; 6665 6666 if (RAW != '(') { 6667 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6668 "xmlParseElementContentDecl : %s '(' expected\n", name); 6669 return(-1); 6670 } 6671 NEXT; 6672 GROW; 6673 if (ctxt->instate == XML_PARSER_EOF) 6674 return(-1); 6675 SKIP_BLANKS; 6676 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6677 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6678 res = XML_ELEMENT_TYPE_MIXED; 6679 } else { 6680 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6681 res = XML_ELEMENT_TYPE_ELEMENT; 6682 } 6683 SKIP_BLANKS; 6684 *result = tree; 6685 return(res); 6686 } 6687 6688 /** 6689 * xmlParseElementDecl: 6690 * @ctxt: an XML parser context 6691 * 6692 * parse an Element declaration. 6693 * 6694 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6695 * 6696 * [ VC: Unique Element Type Declaration ] 6697 * No element type may be declared more than once 6698 * 6699 * Returns the type of the element, or -1 in case of error 6700 */ 6701 int 6702 xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6703 const xmlChar *name; 6704 int ret = -1; 6705 xmlElementContentPtr content = NULL; 6706 6707 /* GROW; done in the caller */ 6708 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6709 xmlParserInputPtr input = ctxt->input; 6710 6711 SKIP(9); 6712 if (!IS_BLANK_CH(CUR)) { 6713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6714 "Space required after 'ELEMENT'\n"); 6715 return(-1); 6716 } 6717 SKIP_BLANKS; 6718 name = xmlParseName(ctxt); 6719 if (name == NULL) { 6720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6721 "xmlParseElementDecl: no name for Element\n"); 6722 return(-1); 6723 } 6724 while ((RAW == 0) && (ctxt->inputNr > 1)) 6725 xmlPopInput(ctxt); 6726 if (!IS_BLANK_CH(CUR)) { 6727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6728 "Space required after the element name\n"); 6729 } 6730 SKIP_BLANKS; 6731 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6732 SKIP(5); 6733 /* 6734 * Element must always be empty. 6735 */ 6736 ret = XML_ELEMENT_TYPE_EMPTY; 6737 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6738 (NXT(2) == 'Y')) { 6739 SKIP(3); 6740 /* 6741 * Element is a generic container. 6742 */ 6743 ret = XML_ELEMENT_TYPE_ANY; 6744 } else if (RAW == '(') { 6745 ret = xmlParseElementContentDecl(ctxt, name, &content); 6746 } else { 6747 /* 6748 * [ WFC: PEs in Internal Subset ] error handling. 6749 */ 6750 if ((RAW == '%') && (ctxt->external == 0) && 6751 (ctxt->inputNr == 1)) { 6752 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6753 "PEReference: forbidden within markup decl in internal subset\n"); 6754 } else { 6755 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6756 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6757 } 6758 return(-1); 6759 } 6760 6761 SKIP_BLANKS; 6762 /* 6763 * Pop-up of finished entities. 6764 */ 6765 while ((RAW == 0) && (ctxt->inputNr > 1)) 6766 xmlPopInput(ctxt); 6767 SKIP_BLANKS; 6768 6769 if (RAW != '>') { 6770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6771 if (content != NULL) { 6772 xmlFreeDocElementContent(ctxt->myDoc, content); 6773 } 6774 } else { 6775 if (input != ctxt->input) { 6776 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6777 "Element declaration doesn't start and stop in the same entity\n"); 6778 } 6779 6780 NEXT; 6781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6782 (ctxt->sax->elementDecl != NULL)) { 6783 if (content != NULL) 6784 content->parent = NULL; 6785 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6786 content); 6787 if ((content != NULL) && (content->parent == NULL)) { 6788 /* 6789 * this is a trick: if xmlAddElementDecl is called, 6790 * instead of copying the full tree it is plugged directly 6791 * if called from the parser. Avoid duplicating the 6792 * interfaces or change the API/ABI 6793 */ 6794 xmlFreeDocElementContent(ctxt->myDoc, content); 6795 } 6796 } else if (content != NULL) { 6797 xmlFreeDocElementContent(ctxt->myDoc, content); 6798 } 6799 } 6800 } 6801 return(ret); 6802 } 6803 6804 /** 6805 * xmlParseConditionalSections 6806 * @ctxt: an XML parser context 6807 * 6808 * [61] conditionalSect ::= includeSect | ignoreSect 6809 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6810 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6811 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6812 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6813 */ 6814 6815 static void 6816 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6817 int id = ctxt->input->id; 6818 6819 SKIP(3); 6820 SKIP_BLANKS; 6821 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6822 SKIP(7); 6823 SKIP_BLANKS; 6824 if (RAW != '[') { 6825 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6826 xmlHaltParser(ctxt); 6827 return; 6828 } else { 6829 if (ctxt->input->id != id) { 6830 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6831 "All markup of the conditional section is not in the same entity\n", 6832 NULL, NULL); 6833 } 6834 NEXT; 6835 } 6836 if (xmlParserDebugEntities) { 6837 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6838 xmlGenericError(xmlGenericErrorContext, 6839 "%s(%d): ", ctxt->input->filename, 6840 ctxt->input->line); 6841 xmlGenericError(xmlGenericErrorContext, 6842 "Entering INCLUDE Conditional Section\n"); 6843 } 6844 6845 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6846 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) { 6847 const xmlChar *check = CUR_PTR; 6848 unsigned int cons = ctxt->input->consumed; 6849 6850 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6851 xmlParseConditionalSections(ctxt); 6852 } else if (IS_BLANK_CH(CUR)) { 6853 NEXT; 6854 } else if (RAW == '%') { 6855 xmlParsePEReference(ctxt); 6856 } else 6857 xmlParseMarkupDecl(ctxt); 6858 6859 /* 6860 * Pop-up of finished entities. 6861 */ 6862 while ((RAW == 0) && (ctxt->inputNr > 1)) 6863 xmlPopInput(ctxt); 6864 6865 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6867 xmlHaltParser(ctxt); 6868 break; 6869 } 6870 } 6871 if (xmlParserDebugEntities) { 6872 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6873 xmlGenericError(xmlGenericErrorContext, 6874 "%s(%d): ", ctxt->input->filename, 6875 ctxt->input->line); 6876 xmlGenericError(xmlGenericErrorContext, 6877 "Leaving INCLUDE Conditional Section\n"); 6878 } 6879 6880 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6881 int state; 6882 xmlParserInputState instate; 6883 int depth = 0; 6884 6885 SKIP(6); 6886 SKIP_BLANKS; 6887 if (RAW != '[') { 6888 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6889 xmlHaltParser(ctxt); 6890 return; 6891 } else { 6892 if (ctxt->input->id != id) { 6893 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6894 "All markup of the conditional section is not in the same entity\n", 6895 NULL, NULL); 6896 } 6897 NEXT; 6898 } 6899 if (xmlParserDebugEntities) { 6900 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6901 xmlGenericError(xmlGenericErrorContext, 6902 "%s(%d): ", ctxt->input->filename, 6903 ctxt->input->line); 6904 xmlGenericError(xmlGenericErrorContext, 6905 "Entering IGNORE Conditional Section\n"); 6906 } 6907 6908 /* 6909 * Parse up to the end of the conditional section 6910 * But disable SAX event generating DTD building in the meantime 6911 */ 6912 state = ctxt->disableSAX; 6913 instate = ctxt->instate; 6914 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6915 ctxt->instate = XML_PARSER_IGNORE; 6916 6917 while (((depth >= 0) && (RAW != 0)) && 6918 (ctxt->instate != XML_PARSER_EOF)) { 6919 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6920 depth++; 6921 SKIP(3); 6922 continue; 6923 } 6924 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6925 if (--depth >= 0) SKIP(3); 6926 continue; 6927 } 6928 NEXT; 6929 continue; 6930 } 6931 6932 ctxt->disableSAX = state; 6933 ctxt->instate = instate; 6934 6935 if (xmlParserDebugEntities) { 6936 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6937 xmlGenericError(xmlGenericErrorContext, 6938 "%s(%d): ", ctxt->input->filename, 6939 ctxt->input->line); 6940 xmlGenericError(xmlGenericErrorContext, 6941 "Leaving IGNORE Conditional Section\n"); 6942 } 6943 6944 } else { 6945 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6946 xmlHaltParser(ctxt); 6947 return; 6948 } 6949 6950 if (RAW == 0) 6951 SHRINK; 6952 6953 if (RAW == 0) { 6954 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6955 } else { 6956 if (ctxt->input->id != id) { 6957 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6958 "All markup of the conditional section is not in the same entity\n", 6959 NULL, NULL); 6960 } 6961 if ((ctxt-> instate != XML_PARSER_EOF) && 6962 ((ctxt->input->cur + 3) <= ctxt->input->end)) 6963 SKIP(3); 6964 } 6965 } 6966 6967 /** 6968 * xmlParseMarkupDecl: 6969 * @ctxt: an XML parser context 6970 * 6971 * parse Markup declarations 6972 * 6973 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6974 * NotationDecl | PI | Comment 6975 * 6976 * [ VC: Proper Declaration/PE Nesting ] 6977 * Parameter-entity replacement text must be properly nested with 6978 * markup declarations. That is to say, if either the first character 6979 * or the last character of a markup declaration (markupdecl above) is 6980 * contained in the replacement text for a parameter-entity reference, 6981 * both must be contained in the same replacement text. 6982 * 6983 * [ WFC: PEs in Internal Subset ] 6984 * In the internal DTD subset, parameter-entity references can occur 6985 * only where markup declarations can occur, not within markup declarations. 6986 * (This does not apply to references that occur in external parameter 6987 * entities or to the external subset.) 6988 */ 6989 void 6990 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6991 GROW; 6992 if (CUR == '<') { 6993 if (NXT(1) == '!') { 6994 switch (NXT(2)) { 6995 case 'E': 6996 if (NXT(3) == 'L') 6997 xmlParseElementDecl(ctxt); 6998 else if (NXT(3) == 'N') 6999 xmlParseEntityDecl(ctxt); 7000 break; 7001 case 'A': 7002 xmlParseAttributeListDecl(ctxt); 7003 break; 7004 case 'N': 7005 xmlParseNotationDecl(ctxt); 7006 break; 7007 case '-': 7008 xmlParseComment(ctxt); 7009 break; 7010 default: 7011 /* there is an error but it will be detected later */ 7012 break; 7013 } 7014 } else if (NXT(1) == '?') { 7015 xmlParsePI(ctxt); 7016 } 7017 } 7018 7019 /* 7020 * detect requirement to exit there and act accordingly 7021 * and avoid having instate overriden later on 7022 */ 7023 if (ctxt->instate == XML_PARSER_EOF) 7024 return; 7025 7026 /* 7027 * This is only for internal subset. On external entities, 7028 * the replacement is done before parsing stage 7029 */ 7030 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 7031 xmlParsePEReference(ctxt); 7032 7033 /* 7034 * Conditional sections are allowed from entities included 7035 * by PE References in the internal subset. 7036 */ 7037 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7039 xmlParseConditionalSections(ctxt); 7040 } 7041 } 7042 7043 ctxt->instate = XML_PARSER_DTD; 7044 } 7045 7046 /** 7047 * xmlParseTextDecl: 7048 * @ctxt: an XML parser context 7049 * 7050 * parse an XML declaration header for external entities 7051 * 7052 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 7053 */ 7054 7055 void 7056 xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 7057 xmlChar *version; 7058 const xmlChar *encoding; 7059 7060 /* 7061 * We know that '<?xml' is here. 7062 */ 7063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 7064 SKIP(5); 7065 } else { 7066 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 7067 return; 7068 } 7069 7070 if (!IS_BLANK_CH(CUR)) { 7071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7072 "Space needed after '<?xml'\n"); 7073 } 7074 SKIP_BLANKS; 7075 7076 /* 7077 * We may have the VersionInfo here. 7078 */ 7079 version = xmlParseVersionInfo(ctxt); 7080 if (version == NULL) 7081 version = xmlCharStrdup(XML_DEFAULT_VERSION); 7082 else { 7083 if (!IS_BLANK_CH(CUR)) { 7084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7085 "Space needed here\n"); 7086 } 7087 } 7088 ctxt->input->version = version; 7089 7090 /* 7091 * We must have the encoding declaration 7092 */ 7093 encoding = xmlParseEncodingDecl(ctxt); 7094 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7095 /* 7096 * The XML REC instructs us to stop parsing right here 7097 */ 7098 return; 7099 } 7100 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 7101 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 7102 "Missing encoding in text declaration\n"); 7103 } 7104 7105 SKIP_BLANKS; 7106 if ((RAW == '?') && (NXT(1) == '>')) { 7107 SKIP(2); 7108 } else if (RAW == '>') { 7109 /* Deprecated old WD ... */ 7110 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7111 NEXT; 7112 } else { 7113 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 7114 MOVETO_ENDTAG(CUR_PTR); 7115 NEXT; 7116 } 7117 } 7118 7119 /** 7120 * xmlParseExternalSubset: 7121 * @ctxt: an XML parser context 7122 * @ExternalID: the external identifier 7123 * @SystemID: the system identifier (or URL) 7124 * 7125 * parse Markup declarations from an external subset 7126 * 7127 * [30] extSubset ::= textDecl? extSubsetDecl 7128 * 7129 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 7130 */ 7131 void 7132 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 7133 const xmlChar *SystemID) { 7134 xmlDetectSAX2(ctxt); 7135 GROW; 7136 7137 if ((ctxt->encoding == NULL) && 7138 (ctxt->input->end - ctxt->input->cur >= 4)) { 7139 xmlChar start[4]; 7140 xmlCharEncoding enc; 7141 7142 start[0] = RAW; 7143 start[1] = NXT(1); 7144 start[2] = NXT(2); 7145 start[3] = NXT(3); 7146 enc = xmlDetectCharEncoding(start, 4); 7147 if (enc != XML_CHAR_ENCODING_NONE) 7148 xmlSwitchEncoding(ctxt, enc); 7149 } 7150 7151 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 7152 xmlParseTextDecl(ctxt); 7153 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 7154 /* 7155 * The XML REC instructs us to stop parsing right here 7156 */ 7157 xmlHaltParser(ctxt); 7158 return; 7159 } 7160 } 7161 if (ctxt->myDoc == NULL) { 7162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 7163 if (ctxt->myDoc == NULL) { 7164 xmlErrMemory(ctxt, "New Doc failed"); 7165 return; 7166 } 7167 ctxt->myDoc->properties = XML_DOC_INTERNAL; 7168 } 7169 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 7170 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 7171 7172 ctxt->instate = XML_PARSER_DTD; 7173 ctxt->external = 1; 7174 while (((RAW == '<') && (NXT(1) == '?')) || 7175 ((RAW == '<') && (NXT(1) == '!')) || 7176 (RAW == '%') || IS_BLANK_CH(CUR)) { 7177 const xmlChar *check = CUR_PTR; 7178 unsigned int cons = ctxt->input->consumed; 7179 7180 GROW; 7181 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 7182 xmlParseConditionalSections(ctxt); 7183 } else if (IS_BLANK_CH(CUR)) { 7184 NEXT; 7185 } else if (RAW == '%') { 7186 xmlParsePEReference(ctxt); 7187 } else 7188 xmlParseMarkupDecl(ctxt); 7189 7190 /* 7191 * Pop-up of finished entities. 7192 */ 7193 while ((RAW == 0) && (ctxt->inputNr > 1)) 7194 xmlPopInput(ctxt); 7195 7196 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7197 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7198 break; 7199 } 7200 } 7201 7202 if (RAW != 0) { 7203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 7204 } 7205 7206 } 7207 7208 /** 7209 * xmlParseReference: 7210 * @ctxt: an XML parser context 7211 * 7212 * parse and handle entity references in content, depending on the SAX 7213 * interface, this may end-up in a call to character() if this is a 7214 * CharRef, a predefined entity, if there is no reference() callback. 7215 * or if the parser was asked to switch to that mode. 7216 * 7217 * [67] Reference ::= EntityRef | CharRef 7218 */ 7219 void 7220 xmlParseReference(xmlParserCtxtPtr ctxt) { 7221 xmlEntityPtr ent; 7222 xmlChar *val; 7223 int was_checked; 7224 xmlNodePtr list = NULL; 7225 xmlParserErrors ret = XML_ERR_OK; 7226 7227 7228 if (RAW != '&') 7229 return; 7230 7231 /* 7232 * Simple case of a CharRef 7233 */ 7234 if (NXT(1) == '#') { 7235 int i = 0; 7236 xmlChar out[10]; 7237 int hex = NXT(2); 7238 int value = xmlParseCharRef(ctxt); 7239 7240 if (value == 0) 7241 return; 7242 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 7243 /* 7244 * So we are using non-UTF-8 buffers 7245 * Check that the char fit on 8bits, if not 7246 * generate a CharRef. 7247 */ 7248 if (value <= 0xFF) { 7249 out[0] = value; 7250 out[1] = 0; 7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7252 (!ctxt->disableSAX)) 7253 ctxt->sax->characters(ctxt->userData, out, 1); 7254 } else { 7255 if ((hex == 'x') || (hex == 'X')) 7256 snprintf((char *)out, sizeof(out), "#x%X", value); 7257 else 7258 snprintf((char *)out, sizeof(out), "#%d", value); 7259 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7260 (!ctxt->disableSAX)) 7261 ctxt->sax->reference(ctxt->userData, out); 7262 } 7263 } else { 7264 /* 7265 * Just encode the value in UTF-8 7266 */ 7267 COPY_BUF(0 ,out, i, value); 7268 out[i] = 0; 7269 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7270 (!ctxt->disableSAX)) 7271 ctxt->sax->characters(ctxt->userData, out, i); 7272 } 7273 return; 7274 } 7275 7276 /* 7277 * We are seeing an entity reference 7278 */ 7279 ent = xmlParseEntityRef(ctxt); 7280 if (ent == NULL) return; 7281 if (!ctxt->wellFormed) 7282 return; 7283 was_checked = ent->checked; 7284 7285 /* special case of predefined entities */ 7286 if ((ent->name == NULL) || 7287 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 7288 val = ent->content; 7289 if (val == NULL) return; 7290 /* 7291 * inline the entity. 7292 */ 7293 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 7294 (!ctxt->disableSAX)) 7295 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 7296 return; 7297 } 7298 7299 /* 7300 * The first reference to the entity trigger a parsing phase 7301 * where the ent->children is filled with the result from 7302 * the parsing. 7303 * Note: external parsed entities will not be loaded, it is not 7304 * required for a non-validating parser, unless the parsing option 7305 * of validating, or substituting entities were given. Doing so is 7306 * far more secure as the parser will only process data coming from 7307 * the document entity by default. 7308 */ 7309 if (((ent->checked == 0) || 7310 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) && 7311 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) || 7312 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) { 7313 unsigned long oldnbent = ctxt->nbentities; 7314 7315 /* 7316 * This is a bit hackish but this seems the best 7317 * way to make sure both SAX and DOM entity support 7318 * behaves okay. 7319 */ 7320 void *user_data; 7321 if (ctxt->userData == ctxt) 7322 user_data = NULL; 7323 else 7324 user_data = ctxt->userData; 7325 7326 /* 7327 * Check that this entity is well formed 7328 * 4.3.2: An internal general parsed entity is well-formed 7329 * if its replacement text matches the production labeled 7330 * content. 7331 */ 7332 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7333 ctxt->depth++; 7334 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 7335 user_data, &list); 7336 ctxt->depth--; 7337 7338 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7339 ctxt->depth++; 7340 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 7341 user_data, ctxt->depth, ent->URI, 7342 ent->ExternalID, &list); 7343 ctxt->depth--; 7344 } else { 7345 ret = XML_ERR_ENTITY_PE_INTERNAL; 7346 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7347 "invalid entity type found\n", NULL); 7348 } 7349 7350 /* 7351 * Store the number of entities needing parsing for this entity 7352 * content and do checkings 7353 */ 7354 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; 7355 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<'))) 7356 ent->checked |= 1; 7357 if (ret == XML_ERR_ENTITY_LOOP) { 7358 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7359 xmlFreeNodeList(list); 7360 return; 7361 } 7362 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { 7363 xmlFreeNodeList(list); 7364 return; 7365 } 7366 7367 if ((ret == XML_ERR_OK) && (list != NULL)) { 7368 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 7369 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 7370 (ent->children == NULL)) { 7371 ent->children = list; 7372 if (ctxt->replaceEntities) { 7373 /* 7374 * Prune it directly in the generated document 7375 * except for single text nodes. 7376 */ 7377 if (((list->type == XML_TEXT_NODE) && 7378 (list->next == NULL)) || 7379 (ctxt->parseMode == XML_PARSE_READER)) { 7380 list->parent = (xmlNodePtr) ent; 7381 list = NULL; 7382 ent->owner = 1; 7383 } else { 7384 ent->owner = 0; 7385 while (list != NULL) { 7386 list->parent = (xmlNodePtr) ctxt->node; 7387 list->doc = ctxt->myDoc; 7388 if (list->next == NULL) 7389 ent->last = list; 7390 list = list->next; 7391 } 7392 list = ent->children; 7393 #ifdef LIBXML_LEGACY_ENABLED 7394 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7395 xmlAddEntityReference(ent, list, NULL); 7396 #endif /* LIBXML_LEGACY_ENABLED */ 7397 } 7398 } else { 7399 ent->owner = 1; 7400 while (list != NULL) { 7401 list->parent = (xmlNodePtr) ent; 7402 xmlSetTreeDoc(list, ent->doc); 7403 if (list->next == NULL) 7404 ent->last = list; 7405 list = list->next; 7406 } 7407 } 7408 } else { 7409 xmlFreeNodeList(list); 7410 list = NULL; 7411 } 7412 } else if ((ret != XML_ERR_OK) && 7413 (ret != XML_WAR_UNDECLARED_ENTITY)) { 7414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7415 "Entity '%s' failed to parse\n", ent->name); 7416 xmlParserEntityCheck(ctxt, 0, ent, 0); 7417 } else if (list != NULL) { 7418 xmlFreeNodeList(list); 7419 list = NULL; 7420 } 7421 if (ent->checked == 0) 7422 ent->checked = 2; 7423 } else if (ent->checked != 1) { 7424 ctxt->nbentities += ent->checked / 2; 7425 } 7426 7427 /* 7428 * Now that the entity content has been gathered 7429 * provide it to the application, this can take different forms based 7430 * on the parsing modes. 7431 */ 7432 if (ent->children == NULL) { 7433 /* 7434 * Probably running in SAX mode and the callbacks don't 7435 * build the entity content. So unless we already went 7436 * though parsing for first checking go though the entity 7437 * content to generate callbacks associated to the entity 7438 */ 7439 if (was_checked != 0) { 7440 void *user_data; 7441 /* 7442 * This is a bit hackish but this seems the best 7443 * way to make sure both SAX and DOM entity support 7444 * behaves okay. 7445 */ 7446 if (ctxt->userData == ctxt) 7447 user_data = NULL; 7448 else 7449 user_data = ctxt->userData; 7450 7451 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 7452 ctxt->depth++; 7453 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 7454 ent->content, user_data, NULL); 7455 ctxt->depth--; 7456 } else if (ent->etype == 7457 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 7458 ctxt->depth++; 7459 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 7460 ctxt->sax, user_data, ctxt->depth, 7461 ent->URI, ent->ExternalID, NULL); 7462 ctxt->depth--; 7463 } else { 7464 ret = XML_ERR_ENTITY_PE_INTERNAL; 7465 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 7466 "invalid entity type found\n", NULL); 7467 } 7468 if (ret == XML_ERR_ENTITY_LOOP) { 7469 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 7470 return; 7471 } 7472 } 7473 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7474 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7475 /* 7476 * Entity reference callback comes second, it's somewhat 7477 * superfluous but a compatibility to historical behaviour 7478 */ 7479 ctxt->sax->reference(ctxt->userData, ent->name); 7480 } 7481 return; 7482 } 7483 7484 /* 7485 * If we didn't get any children for the entity being built 7486 */ 7487 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 7488 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 7489 /* 7490 * Create a node. 7491 */ 7492 ctxt->sax->reference(ctxt->userData, ent->name); 7493 return; 7494 } 7495 7496 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 7497 /* 7498 * There is a problem on the handling of _private for entities 7499 * (bug 155816): Should we copy the content of the field from 7500 * the entity (possibly overwriting some value set by the user 7501 * when a copy is created), should we leave it alone, or should 7502 * we try to take care of different situations? The problem 7503 * is exacerbated by the usage of this field by the xmlReader. 7504 * To fix this bug, we look at _private on the created node 7505 * and, if it's NULL, we copy in whatever was in the entity. 7506 * If it's not NULL we leave it alone. This is somewhat of a 7507 * hack - maybe we should have further tests to determine 7508 * what to do. 7509 */ 7510 if ((ctxt->node != NULL) && (ent->children != NULL)) { 7511 /* 7512 * Seems we are generating the DOM content, do 7513 * a simple tree copy for all references except the first 7514 * In the first occurrence list contains the replacement. 7515 */ 7516 if (((list == NULL) && (ent->owner == 0)) || 7517 (ctxt->parseMode == XML_PARSE_READER)) { 7518 xmlNodePtr nw = NULL, cur, firstChild = NULL; 7519 7520 /* 7521 * We are copying here, make sure there is no abuse 7522 */ 7523 ctxt->sizeentcopy += ent->length + 5; 7524 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7525 return; 7526 7527 /* 7528 * when operating on a reader, the entities definitions 7529 * are always owning the entities subtree. 7530 if (ctxt->parseMode == XML_PARSE_READER) 7531 ent->owner = 1; 7532 */ 7533 7534 cur = ent->children; 7535 while (cur != NULL) { 7536 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7537 if (nw != NULL) { 7538 if (nw->_private == NULL) 7539 nw->_private = cur->_private; 7540 if (firstChild == NULL){ 7541 firstChild = nw; 7542 } 7543 nw = xmlAddChild(ctxt->node, nw); 7544 } 7545 if (cur == ent->last) { 7546 /* 7547 * needed to detect some strange empty 7548 * node cases in the reader tests 7549 */ 7550 if ((ctxt->parseMode == XML_PARSE_READER) && 7551 (nw != NULL) && 7552 (nw->type == XML_ELEMENT_NODE) && 7553 (nw->children == NULL)) 7554 nw->extra = 1; 7555 7556 break; 7557 } 7558 cur = cur->next; 7559 } 7560 #ifdef LIBXML_LEGACY_ENABLED 7561 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7562 xmlAddEntityReference(ent, firstChild, nw); 7563 #endif /* LIBXML_LEGACY_ENABLED */ 7564 } else if ((list == NULL) || (ctxt->inputNr > 0)) { 7565 xmlNodePtr nw = NULL, cur, next, last, 7566 firstChild = NULL; 7567 7568 /* 7569 * We are copying here, make sure there is no abuse 7570 */ 7571 ctxt->sizeentcopy += ent->length + 5; 7572 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) 7573 return; 7574 7575 /* 7576 * Copy the entity child list and make it the new 7577 * entity child list. The goal is to make sure any 7578 * ID or REF referenced will be the one from the 7579 * document content and not the entity copy. 7580 */ 7581 cur = ent->children; 7582 ent->children = NULL; 7583 last = ent->last; 7584 ent->last = NULL; 7585 while (cur != NULL) { 7586 next = cur->next; 7587 cur->next = NULL; 7588 cur->parent = NULL; 7589 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7590 if (nw != NULL) { 7591 if (nw->_private == NULL) 7592 nw->_private = cur->_private; 7593 if (firstChild == NULL){ 7594 firstChild = cur; 7595 } 7596 xmlAddChild((xmlNodePtr) ent, nw); 7597 xmlAddChild(ctxt->node, cur); 7598 } 7599 if (cur == last) 7600 break; 7601 cur = next; 7602 } 7603 if (ent->owner == 0) 7604 ent->owner = 1; 7605 #ifdef LIBXML_LEGACY_ENABLED 7606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7607 xmlAddEntityReference(ent, firstChild, nw); 7608 #endif /* LIBXML_LEGACY_ENABLED */ 7609 } else { 7610 const xmlChar *nbktext; 7611 7612 /* 7613 * the name change is to avoid coalescing of the 7614 * node with a possible previous text one which 7615 * would make ent->children a dangling pointer 7616 */ 7617 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7618 -1); 7619 if (ent->children->type == XML_TEXT_NODE) 7620 ent->children->name = nbktext; 7621 if ((ent->last != ent->children) && 7622 (ent->last->type == XML_TEXT_NODE)) 7623 ent->last->name = nbktext; 7624 xmlAddChildList(ctxt->node, ent->children); 7625 } 7626 7627 /* 7628 * This is to avoid a nasty side effect, see 7629 * characters() in SAX.c 7630 */ 7631 ctxt->nodemem = 0; 7632 ctxt->nodelen = 0; 7633 return; 7634 } 7635 } 7636 } 7637 7638 /** 7639 * xmlParseEntityRef: 7640 * @ctxt: an XML parser context 7641 * 7642 * parse ENTITY references declarations 7643 * 7644 * [68] EntityRef ::= '&' Name ';' 7645 * 7646 * [ WFC: Entity Declared ] 7647 * In a document without any DTD, a document with only an internal DTD 7648 * subset which contains no parameter entity references, or a document 7649 * with "standalone='yes'", the Name given in the entity reference 7650 * must match that in an entity declaration, except that well-formed 7651 * documents need not declare any of the following entities: amp, lt, 7652 * gt, apos, quot. The declaration of a parameter entity must precede 7653 * any reference to it. Similarly, the declaration of a general entity 7654 * must precede any reference to it which appears in a default value in an 7655 * attribute-list declaration. Note that if entities are declared in the 7656 * external subset or in external parameter entities, a non-validating 7657 * processor is not obligated to read and process their declarations; 7658 * for such documents, the rule that an entity must be declared is a 7659 * well-formedness constraint only if standalone='yes'. 7660 * 7661 * [ WFC: Parsed Entity ] 7662 * An entity reference must not contain the name of an unparsed entity 7663 * 7664 * Returns the xmlEntityPtr if found, or NULL otherwise. 7665 */ 7666 xmlEntityPtr 7667 xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7668 const xmlChar *name; 7669 xmlEntityPtr ent = NULL; 7670 7671 GROW; 7672 if (ctxt->instate == XML_PARSER_EOF) 7673 return(NULL); 7674 7675 if (RAW != '&') 7676 return(NULL); 7677 NEXT; 7678 name = xmlParseName(ctxt); 7679 if (name == NULL) { 7680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7681 "xmlParseEntityRef: no name\n"); 7682 return(NULL); 7683 } 7684 if (RAW != ';') { 7685 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7686 return(NULL); 7687 } 7688 NEXT; 7689 7690 /* 7691 * Predefined entities override any extra definition 7692 */ 7693 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7694 ent = xmlGetPredefinedEntity(name); 7695 if (ent != NULL) 7696 return(ent); 7697 } 7698 7699 /* 7700 * Increase the number of entity references parsed 7701 */ 7702 ctxt->nbentities++; 7703 7704 /* 7705 * Ask first SAX for entity resolution, otherwise try the 7706 * entities which may have stored in the parser context. 7707 */ 7708 if (ctxt->sax != NULL) { 7709 if (ctxt->sax->getEntity != NULL) 7710 ent = ctxt->sax->getEntity(ctxt->userData, name); 7711 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7712 (ctxt->options & XML_PARSE_OLDSAX)) 7713 ent = xmlGetPredefinedEntity(name); 7714 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7715 (ctxt->userData==ctxt)) { 7716 ent = xmlSAX2GetEntity(ctxt, name); 7717 } 7718 } 7719 if (ctxt->instate == XML_PARSER_EOF) 7720 return(NULL); 7721 /* 7722 * [ WFC: Entity Declared ] 7723 * In a document without any DTD, a document with only an 7724 * internal DTD subset which contains no parameter entity 7725 * references, or a document with "standalone='yes'", the 7726 * Name given in the entity reference must match that in an 7727 * entity declaration, except that well-formed documents 7728 * need not declare any of the following entities: amp, lt, 7729 * gt, apos, quot. 7730 * The declaration of a parameter entity must precede any 7731 * reference to it. 7732 * Similarly, the declaration of a general entity must 7733 * precede any reference to it which appears in a default 7734 * value in an attribute-list declaration. Note that if 7735 * entities are declared in the external subset or in 7736 * external parameter entities, a non-validating processor 7737 * is not obligated to read and process their declarations; 7738 * for such documents, the rule that an entity must be 7739 * declared is a well-formedness constraint only if 7740 * standalone='yes'. 7741 */ 7742 if (ent == NULL) { 7743 if ((ctxt->standalone == 1) || 7744 ((ctxt->hasExternalSubset == 0) && 7745 (ctxt->hasPErefs == 0))) { 7746 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7747 "Entity '%s' not defined\n", name); 7748 } else { 7749 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7750 "Entity '%s' not defined\n", name); 7751 if ((ctxt->inSubset == 0) && 7752 (ctxt->sax != NULL) && 7753 (ctxt->sax->reference != NULL)) { 7754 ctxt->sax->reference(ctxt->userData, name); 7755 } 7756 } 7757 xmlParserEntityCheck(ctxt, 0, ent, 0); 7758 ctxt->valid = 0; 7759 } 7760 7761 /* 7762 * [ WFC: Parsed Entity ] 7763 * An entity reference must not contain the name of an 7764 * unparsed entity 7765 */ 7766 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7767 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7768 "Entity reference to unparsed entity %s\n", name); 7769 } 7770 7771 /* 7772 * [ WFC: No External Entity References ] 7773 * Attribute values cannot contain direct or indirect 7774 * entity references to external entities. 7775 */ 7776 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7777 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7779 "Attribute references external entity '%s'\n", name); 7780 } 7781 /* 7782 * [ WFC: No < in Attribute Values ] 7783 * The replacement text of any entity referred to directly or 7784 * indirectly in an attribute value (other than "<") must 7785 * not contain a <. 7786 */ 7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7788 (ent != NULL) && 7789 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 7790 if (((ent->checked & 1) || (ent->checked == 0)) && 7791 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) { 7792 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7793 "'<' in entity '%s' is not allowed in attributes values\n", name); 7794 } 7795 } 7796 7797 /* 7798 * Internal check, no parameter entities here ... 7799 */ 7800 else { 7801 switch (ent->etype) { 7802 case XML_INTERNAL_PARAMETER_ENTITY: 7803 case XML_EXTERNAL_PARAMETER_ENTITY: 7804 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7805 "Attempt to reference the parameter entity '%s'\n", 7806 name); 7807 break; 7808 default: 7809 break; 7810 } 7811 } 7812 7813 /* 7814 * [ WFC: No Recursion ] 7815 * A parsed entity must not contain a recursive reference 7816 * to itself, either directly or indirectly. 7817 * Done somewhere else 7818 */ 7819 return(ent); 7820 } 7821 7822 /** 7823 * xmlParseStringEntityRef: 7824 * @ctxt: an XML parser context 7825 * @str: a pointer to an index in the string 7826 * 7827 * parse ENTITY references declarations, but this version parses it from 7828 * a string value. 7829 * 7830 * [68] EntityRef ::= '&' Name ';' 7831 * 7832 * [ WFC: Entity Declared ] 7833 * In a document without any DTD, a document with only an internal DTD 7834 * subset which contains no parameter entity references, or a document 7835 * with "standalone='yes'", the Name given in the entity reference 7836 * must match that in an entity declaration, except that well-formed 7837 * documents need not declare any of the following entities: amp, lt, 7838 * gt, apos, quot. The declaration of a parameter entity must precede 7839 * any reference to it. Similarly, the declaration of a general entity 7840 * must precede any reference to it which appears in a default value in an 7841 * attribute-list declaration. Note that if entities are declared in the 7842 * external subset or in external parameter entities, a non-validating 7843 * processor is not obligated to read and process their declarations; 7844 * for such documents, the rule that an entity must be declared is a 7845 * well-formedness constraint only if standalone='yes'. 7846 * 7847 * [ WFC: Parsed Entity ] 7848 * An entity reference must not contain the name of an unparsed entity 7849 * 7850 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7851 * is updated to the current location in the string. 7852 */ 7853 static xmlEntityPtr 7854 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7855 xmlChar *name; 7856 const xmlChar *ptr; 7857 xmlChar cur; 7858 xmlEntityPtr ent = NULL; 7859 7860 if ((str == NULL) || (*str == NULL)) 7861 return(NULL); 7862 ptr = *str; 7863 cur = *ptr; 7864 if (cur != '&') 7865 return(NULL); 7866 7867 ptr++; 7868 name = xmlParseStringName(ctxt, &ptr); 7869 if (name == NULL) { 7870 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7871 "xmlParseStringEntityRef: no name\n"); 7872 *str = ptr; 7873 return(NULL); 7874 } 7875 if (*ptr != ';') { 7876 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7877 xmlFree(name); 7878 *str = ptr; 7879 return(NULL); 7880 } 7881 ptr++; 7882 7883 7884 /* 7885 * Predefined entities override any extra definition 7886 */ 7887 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7888 ent = xmlGetPredefinedEntity(name); 7889 if (ent != NULL) { 7890 xmlFree(name); 7891 *str = ptr; 7892 return(ent); 7893 } 7894 } 7895 7896 /* 7897 * Increate the number of entity references parsed 7898 */ 7899 ctxt->nbentities++; 7900 7901 /* 7902 * Ask first SAX for entity resolution, otherwise try the 7903 * entities which may have stored in the parser context. 7904 */ 7905 if (ctxt->sax != NULL) { 7906 if (ctxt->sax->getEntity != NULL) 7907 ent = ctxt->sax->getEntity(ctxt->userData, name); 7908 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7909 ent = xmlGetPredefinedEntity(name); 7910 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7911 ent = xmlSAX2GetEntity(ctxt, name); 7912 } 7913 } 7914 if (ctxt->instate == XML_PARSER_EOF) { 7915 xmlFree(name); 7916 return(NULL); 7917 } 7918 7919 /* 7920 * [ WFC: Entity Declared ] 7921 * In a document without any DTD, a document with only an 7922 * internal DTD subset which contains no parameter entity 7923 * references, or a document with "standalone='yes'", the 7924 * Name given in the entity reference must match that in an 7925 * entity declaration, except that well-formed documents 7926 * need not declare any of the following entities: amp, lt, 7927 * gt, apos, quot. 7928 * The declaration of a parameter entity must precede any 7929 * reference to it. 7930 * Similarly, the declaration of a general entity must 7931 * precede any reference to it which appears in a default 7932 * value in an attribute-list declaration. Note that if 7933 * entities are declared in the external subset or in 7934 * external parameter entities, a non-validating processor 7935 * is not obligated to read and process their declarations; 7936 * for such documents, the rule that an entity must be 7937 * declared is a well-formedness constraint only if 7938 * standalone='yes'. 7939 */ 7940 if (ent == NULL) { 7941 if ((ctxt->standalone == 1) || 7942 ((ctxt->hasExternalSubset == 0) && 7943 (ctxt->hasPErefs == 0))) { 7944 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7945 "Entity '%s' not defined\n", name); 7946 } else { 7947 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7948 "Entity '%s' not defined\n", 7949 name); 7950 } 7951 xmlParserEntityCheck(ctxt, 0, ent, 0); 7952 /* TODO ? check regressions ctxt->valid = 0; */ 7953 } 7954 7955 /* 7956 * [ WFC: Parsed Entity ] 7957 * An entity reference must not contain the name of an 7958 * unparsed entity 7959 */ 7960 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7962 "Entity reference to unparsed entity %s\n", name); 7963 } 7964 7965 /* 7966 * [ WFC: No External Entity References ] 7967 * Attribute values cannot contain direct or indirect 7968 * entity references to external entities. 7969 */ 7970 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7971 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7972 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7973 "Attribute references external entity '%s'\n", name); 7974 } 7975 /* 7976 * [ WFC: No < in Attribute Values ] 7977 * The replacement text of any entity referred to directly or 7978 * indirectly in an attribute value (other than "<") must 7979 * not contain a <. 7980 */ 7981 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7982 (ent != NULL) && (ent->content != NULL) && 7983 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7984 (xmlStrchr(ent->content, '<'))) { 7985 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7986 "'<' in entity '%s' is not allowed in attributes values\n", 7987 name); 7988 } 7989 7990 /* 7991 * Internal check, no parameter entities here ... 7992 */ 7993 else { 7994 switch (ent->etype) { 7995 case XML_INTERNAL_PARAMETER_ENTITY: 7996 case XML_EXTERNAL_PARAMETER_ENTITY: 7997 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7998 "Attempt to reference the parameter entity '%s'\n", 7999 name); 8000 break; 8001 default: 8002 break; 8003 } 8004 } 8005 8006 /* 8007 * [ WFC: No Recursion ] 8008 * A parsed entity must not contain a recursive reference 8009 * to itself, either directly or indirectly. 8010 * Done somewhere else 8011 */ 8012 8013 xmlFree(name); 8014 *str = ptr; 8015 return(ent); 8016 } 8017 8018 /** 8019 * xmlParsePEReference: 8020 * @ctxt: an XML parser context 8021 * 8022 * parse PEReference declarations 8023 * The entity content is handled directly by pushing it's content as 8024 * a new input stream. 8025 * 8026 * [69] PEReference ::= '%' Name ';' 8027 * 8028 * [ WFC: No Recursion ] 8029 * A parsed entity must not contain a recursive 8030 * reference to itself, either directly or indirectly. 8031 * 8032 * [ WFC: Entity Declared ] 8033 * In a document without any DTD, a document with only an internal DTD 8034 * subset which contains no parameter entity references, or a document 8035 * with "standalone='yes'", ... ... The declaration of a parameter 8036 * entity must precede any reference to it... 8037 * 8038 * [ VC: Entity Declared ] 8039 * In a document with an external subset or external parameter entities 8040 * with "standalone='no'", ... ... The declaration of a parameter entity 8041 * must precede any reference to it... 8042 * 8043 * [ WFC: In DTD ] 8044 * Parameter-entity references may only appear in the DTD. 8045 * NOTE: misleading but this is handled. 8046 */ 8047 void 8048 xmlParsePEReference(xmlParserCtxtPtr ctxt) 8049 { 8050 const xmlChar *name; 8051 xmlEntityPtr entity = NULL; 8052 xmlParserInputPtr input; 8053 8054 if (RAW != '%') 8055 return; 8056 NEXT; 8057 name = xmlParseName(ctxt); 8058 if (name == NULL) { 8059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8060 "xmlParsePEReference: no name\n"); 8061 return; 8062 } 8063 if (RAW != ';') { 8064 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8065 return; 8066 } 8067 8068 NEXT; 8069 8070 /* 8071 * Increate the number of entity references parsed 8072 */ 8073 ctxt->nbentities++; 8074 8075 /* 8076 * Request the entity from SAX 8077 */ 8078 if ((ctxt->sax != NULL) && 8079 (ctxt->sax->getParameterEntity != NULL)) 8080 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8081 if (ctxt->instate == XML_PARSER_EOF) 8082 return; 8083 if (entity == NULL) { 8084 /* 8085 * [ WFC: Entity Declared ] 8086 * In a document without any DTD, a document with only an 8087 * internal DTD subset which contains no parameter entity 8088 * references, or a document with "standalone='yes'", ... 8089 * ... The declaration of a parameter entity must precede 8090 * any reference to it... 8091 */ 8092 if ((ctxt->standalone == 1) || 8093 ((ctxt->hasExternalSubset == 0) && 8094 (ctxt->hasPErefs == 0))) { 8095 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8096 "PEReference: %%%s; not found\n", 8097 name); 8098 } else { 8099 /* 8100 * [ VC: Entity Declared ] 8101 * In a document with an external subset or external 8102 * parameter entities with "standalone='no'", ... 8103 * ... The declaration of a parameter entity must 8104 * precede any reference to it... 8105 */ 8106 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8107 "PEReference: %%%s; not found\n", 8108 name, NULL); 8109 ctxt->valid = 0; 8110 } 8111 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8112 } else { 8113 /* 8114 * Internal checking in case the entity quest barfed 8115 */ 8116 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8117 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8119 "Internal: %%%s; is not a parameter entity\n", 8120 name, NULL); 8121 } else if (ctxt->input->free != deallocblankswrapper) { 8122 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 8123 if (xmlPushInput(ctxt, input) < 0) 8124 return; 8125 } else { 8126 /* 8127 * TODO !!! 8128 * handle the extra spaces added before and after 8129 * c.f. http://www.w3.org/TR/REC-xml#as-PE 8130 */ 8131 input = xmlNewEntityInputStream(ctxt, entity); 8132 if (xmlPushInput(ctxt, input) < 0) 8133 return; 8134 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 8135 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 8136 (IS_BLANK_CH(NXT(5)))) { 8137 xmlParseTextDecl(ctxt); 8138 if (ctxt->errNo == 8139 XML_ERR_UNSUPPORTED_ENCODING) { 8140 /* 8141 * The XML REC instructs us to stop parsing 8142 * right here 8143 */ 8144 xmlHaltParser(ctxt); 8145 return; 8146 } 8147 } 8148 } 8149 } 8150 ctxt->hasPErefs = 1; 8151 } 8152 8153 /** 8154 * xmlLoadEntityContent: 8155 * @ctxt: an XML parser context 8156 * @entity: an unloaded system entity 8157 * 8158 * Load the original content of the given system entity from the 8159 * ExternalID/SystemID given. This is to be used for Included in Literal 8160 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 8161 * 8162 * Returns 0 in case of success and -1 in case of failure 8163 */ 8164 static int 8165 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 8166 xmlParserInputPtr input; 8167 xmlBufferPtr buf; 8168 int l, c; 8169 int count = 0; 8170 8171 if ((ctxt == NULL) || (entity == NULL) || 8172 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 8173 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 8174 (entity->content != NULL)) { 8175 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8176 "xmlLoadEntityContent parameter error"); 8177 return(-1); 8178 } 8179 8180 if (xmlParserDebugEntities) 8181 xmlGenericError(xmlGenericErrorContext, 8182 "Reading %s entity content input\n", entity->name); 8183 8184 buf = xmlBufferCreate(); 8185 if (buf == NULL) { 8186 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8187 "xmlLoadEntityContent parameter error"); 8188 return(-1); 8189 } 8190 8191 input = xmlNewEntityInputStream(ctxt, entity); 8192 if (input == NULL) { 8193 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8194 "xmlLoadEntityContent input error"); 8195 xmlBufferFree(buf); 8196 return(-1); 8197 } 8198 8199 /* 8200 * Push the entity as the current input, read char by char 8201 * saving to the buffer until the end of the entity or an error 8202 */ 8203 if (xmlPushInput(ctxt, input) < 0) { 8204 xmlBufferFree(buf); 8205 return(-1); 8206 } 8207 8208 GROW; 8209 c = CUR_CHAR(l); 8210 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 8211 (IS_CHAR(c))) { 8212 xmlBufferAdd(buf, ctxt->input->cur, l); 8213 if (count++ > XML_PARSER_CHUNK_SIZE) { 8214 count = 0; 8215 GROW; 8216 if (ctxt->instate == XML_PARSER_EOF) { 8217 xmlBufferFree(buf); 8218 return(-1); 8219 } 8220 } 8221 NEXTL(l); 8222 c = CUR_CHAR(l); 8223 if (c == 0) { 8224 count = 0; 8225 GROW; 8226 if (ctxt->instate == XML_PARSER_EOF) { 8227 xmlBufferFree(buf); 8228 return(-1); 8229 } 8230 c = CUR_CHAR(l); 8231 } 8232 } 8233 8234 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 8235 xmlPopInput(ctxt); 8236 } else if (!IS_CHAR(c)) { 8237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 8238 "xmlLoadEntityContent: invalid char value %d\n", 8239 c); 8240 xmlBufferFree(buf); 8241 return(-1); 8242 } 8243 entity->content = buf->content; 8244 buf->content = NULL; 8245 xmlBufferFree(buf); 8246 8247 return(0); 8248 } 8249 8250 /** 8251 * xmlParseStringPEReference: 8252 * @ctxt: an XML parser context 8253 * @str: a pointer to an index in the string 8254 * 8255 * parse PEReference declarations 8256 * 8257 * [69] PEReference ::= '%' Name ';' 8258 * 8259 * [ WFC: No Recursion ] 8260 * A parsed entity must not contain a recursive 8261 * reference to itself, either directly or indirectly. 8262 * 8263 * [ WFC: Entity Declared ] 8264 * In a document without any DTD, a document with only an internal DTD 8265 * subset which contains no parameter entity references, or a document 8266 * with "standalone='yes'", ... ... The declaration of a parameter 8267 * entity must precede any reference to it... 8268 * 8269 * [ VC: Entity Declared ] 8270 * In a document with an external subset or external parameter entities 8271 * with "standalone='no'", ... ... The declaration of a parameter entity 8272 * must precede any reference to it... 8273 * 8274 * [ WFC: In DTD ] 8275 * Parameter-entity references may only appear in the DTD. 8276 * NOTE: misleading but this is handled. 8277 * 8278 * Returns the string of the entity content. 8279 * str is updated to the current value of the index 8280 */ 8281 static xmlEntityPtr 8282 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 8283 const xmlChar *ptr; 8284 xmlChar cur; 8285 xmlChar *name; 8286 xmlEntityPtr entity = NULL; 8287 8288 if ((str == NULL) || (*str == NULL)) return(NULL); 8289 ptr = *str; 8290 cur = *ptr; 8291 if (cur != '%') 8292 return(NULL); 8293 ptr++; 8294 name = xmlParseStringName(ctxt, &ptr); 8295 if (name == NULL) { 8296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8297 "xmlParseStringPEReference: no name\n"); 8298 *str = ptr; 8299 return(NULL); 8300 } 8301 cur = *ptr; 8302 if (cur != ';') { 8303 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 8304 xmlFree(name); 8305 *str = ptr; 8306 return(NULL); 8307 } 8308 ptr++; 8309 8310 /* 8311 * Increate the number of entity references parsed 8312 */ 8313 ctxt->nbentities++; 8314 8315 /* 8316 * Request the entity from SAX 8317 */ 8318 if ((ctxt->sax != NULL) && 8319 (ctxt->sax->getParameterEntity != NULL)) 8320 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 8321 if (ctxt->instate == XML_PARSER_EOF) { 8322 xmlFree(name); 8323 return(NULL); 8324 } 8325 if (entity == NULL) { 8326 /* 8327 * [ WFC: Entity Declared ] 8328 * In a document without any DTD, a document with only an 8329 * internal DTD subset which contains no parameter entity 8330 * references, or a document with "standalone='yes'", ... 8331 * ... The declaration of a parameter entity must precede 8332 * any reference to it... 8333 */ 8334 if ((ctxt->standalone == 1) || 8335 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 8336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 8337 "PEReference: %%%s; not found\n", name); 8338 } else { 8339 /* 8340 * [ VC: Entity Declared ] 8341 * In a document with an external subset or external 8342 * parameter entities with "standalone='no'", ... 8343 * ... The declaration of a parameter entity must 8344 * precede any reference to it... 8345 */ 8346 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8347 "PEReference: %%%s; not found\n", 8348 name, NULL); 8349 ctxt->valid = 0; 8350 } 8351 xmlParserEntityCheck(ctxt, 0, NULL, 0); 8352 } else { 8353 /* 8354 * Internal checking in case the entity quest barfed 8355 */ 8356 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 8357 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 8358 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 8359 "%%%s; is not a parameter entity\n", 8360 name, NULL); 8361 } 8362 } 8363 ctxt->hasPErefs = 1; 8364 xmlFree(name); 8365 *str = ptr; 8366 return(entity); 8367 } 8368 8369 /** 8370 * xmlParseDocTypeDecl: 8371 * @ctxt: an XML parser context 8372 * 8373 * parse a DOCTYPE declaration 8374 * 8375 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 8376 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8377 * 8378 * [ VC: Root Element Type ] 8379 * The Name in the document type declaration must match the element 8380 * type of the root element. 8381 */ 8382 8383 void 8384 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 8385 const xmlChar *name = NULL; 8386 xmlChar *ExternalID = NULL; 8387 xmlChar *URI = NULL; 8388 8389 /* 8390 * We know that '<!DOCTYPE' has been detected. 8391 */ 8392 SKIP(9); 8393 8394 SKIP_BLANKS; 8395 8396 /* 8397 * Parse the DOCTYPE name. 8398 */ 8399 name = xmlParseName(ctxt); 8400 if (name == NULL) { 8401 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8402 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 8403 } 8404 ctxt->intSubName = name; 8405 8406 SKIP_BLANKS; 8407 8408 /* 8409 * Check for SystemID and ExternalID 8410 */ 8411 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 8412 8413 if ((URI != NULL) || (ExternalID != NULL)) { 8414 ctxt->hasExternalSubset = 1; 8415 } 8416 ctxt->extSubURI = URI; 8417 ctxt->extSubSystem = ExternalID; 8418 8419 SKIP_BLANKS; 8420 8421 /* 8422 * Create and update the internal subset. 8423 */ 8424 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 8425 (!ctxt->disableSAX)) 8426 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 8427 if (ctxt->instate == XML_PARSER_EOF) 8428 return; 8429 8430 /* 8431 * Is there any internal subset declarations ? 8432 * they are handled separately in xmlParseInternalSubset() 8433 */ 8434 if (RAW == '[') 8435 return; 8436 8437 /* 8438 * We should be at the end of the DOCTYPE declaration. 8439 */ 8440 if (RAW != '>') { 8441 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8442 } 8443 NEXT; 8444 } 8445 8446 /** 8447 * xmlParseInternalSubset: 8448 * @ctxt: an XML parser context 8449 * 8450 * parse the internal subset declaration 8451 * 8452 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 8453 */ 8454 8455 static void 8456 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 8457 /* 8458 * Is there any DTD definition ? 8459 */ 8460 if (RAW == '[') { 8461 ctxt->instate = XML_PARSER_DTD; 8462 NEXT; 8463 /* 8464 * Parse the succession of Markup declarations and 8465 * PEReferences. 8466 * Subsequence (markupdecl | PEReference | S)* 8467 */ 8468 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) { 8469 const xmlChar *check = CUR_PTR; 8470 unsigned int cons = ctxt->input->consumed; 8471 8472 SKIP_BLANKS; 8473 xmlParseMarkupDecl(ctxt); 8474 xmlParsePEReference(ctxt); 8475 8476 /* 8477 * Pop-up of finished entities. 8478 */ 8479 while ((RAW == 0) && (ctxt->inputNr > 1)) 8480 xmlPopInput(ctxt); 8481 8482 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 8483 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8484 "xmlParseInternalSubset: error detected in Markup declaration\n"); 8485 break; 8486 } 8487 } 8488 if (RAW == ']') { 8489 NEXT; 8490 SKIP_BLANKS; 8491 } 8492 } 8493 8494 /* 8495 * We should be at the end of the DOCTYPE declaration. 8496 */ 8497 if (RAW != '>') { 8498 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 8499 return; 8500 } 8501 NEXT; 8502 } 8503 8504 #ifdef LIBXML_SAX1_ENABLED 8505 /** 8506 * xmlParseAttribute: 8507 * @ctxt: an XML parser context 8508 * @value: a xmlChar ** used to store the value of the attribute 8509 * 8510 * parse an attribute 8511 * 8512 * [41] Attribute ::= Name Eq AttValue 8513 * 8514 * [ WFC: No External Entity References ] 8515 * Attribute values cannot contain direct or indirect entity references 8516 * to external entities. 8517 * 8518 * [ WFC: No < in Attribute Values ] 8519 * The replacement text of any entity referred to directly or indirectly in 8520 * an attribute value (other than "<") must not contain a <. 8521 * 8522 * [ VC: Attribute Value Type ] 8523 * The attribute must have been declared; the value must be of the type 8524 * declared for it. 8525 * 8526 * [25] Eq ::= S? '=' S? 8527 * 8528 * With namespace: 8529 * 8530 * [NS 11] Attribute ::= QName Eq AttValue 8531 * 8532 * Also the case QName == xmlns:??? is handled independently as a namespace 8533 * definition. 8534 * 8535 * Returns the attribute name, and the value in *value. 8536 */ 8537 8538 const xmlChar * 8539 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 8540 const xmlChar *name; 8541 xmlChar *val; 8542 8543 *value = NULL; 8544 GROW; 8545 name = xmlParseName(ctxt); 8546 if (name == NULL) { 8547 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8548 "error parsing attribute name\n"); 8549 return(NULL); 8550 } 8551 8552 /* 8553 * read the value 8554 */ 8555 SKIP_BLANKS; 8556 if (RAW == '=') { 8557 NEXT; 8558 SKIP_BLANKS; 8559 val = xmlParseAttValue(ctxt); 8560 ctxt->instate = XML_PARSER_CONTENT; 8561 } else { 8562 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8563 "Specification mandate value for attribute %s\n", name); 8564 return(NULL); 8565 } 8566 8567 /* 8568 * Check that xml:lang conforms to the specification 8569 * No more registered as an error, just generate a warning now 8570 * since this was deprecated in XML second edition 8571 */ 8572 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 8573 if (!xmlCheckLanguageID(val)) { 8574 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8575 "Malformed value for xml:lang : %s\n", 8576 val, NULL); 8577 } 8578 } 8579 8580 /* 8581 * Check that xml:space conforms to the specification 8582 */ 8583 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8584 if (xmlStrEqual(val, BAD_CAST "default")) 8585 *(ctxt->space) = 0; 8586 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8587 *(ctxt->space) = 1; 8588 else { 8589 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8590 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8591 val, NULL); 8592 } 8593 } 8594 8595 *value = val; 8596 return(name); 8597 } 8598 8599 /** 8600 * xmlParseStartTag: 8601 * @ctxt: an XML parser context 8602 * 8603 * parse a start of tag either for rule element or 8604 * EmptyElement. In both case we don't parse the tag closing chars. 8605 * 8606 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8607 * 8608 * [ WFC: Unique Att Spec ] 8609 * No attribute name may appear more than once in the same start-tag or 8610 * empty-element tag. 8611 * 8612 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8613 * 8614 * [ WFC: Unique Att Spec ] 8615 * No attribute name may appear more than once in the same start-tag or 8616 * empty-element tag. 8617 * 8618 * With namespace: 8619 * 8620 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8621 * 8622 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8623 * 8624 * Returns the element name parsed 8625 */ 8626 8627 const xmlChar * 8628 xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8629 const xmlChar *name; 8630 const xmlChar *attname; 8631 xmlChar *attvalue; 8632 const xmlChar **atts = ctxt->atts; 8633 int nbatts = 0; 8634 int maxatts = ctxt->maxatts; 8635 int i; 8636 8637 if (RAW != '<') return(NULL); 8638 NEXT1; 8639 8640 name = xmlParseName(ctxt); 8641 if (name == NULL) { 8642 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8643 "xmlParseStartTag: invalid element name\n"); 8644 return(NULL); 8645 } 8646 8647 /* 8648 * Now parse the attributes, it ends up with the ending 8649 * 8650 * (S Attribute)* S? 8651 */ 8652 SKIP_BLANKS; 8653 GROW; 8654 8655 while (((RAW != '>') && 8656 ((RAW != '/') || (NXT(1) != '>')) && 8657 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 8658 const xmlChar *q = CUR_PTR; 8659 unsigned int cons = ctxt->input->consumed; 8660 8661 attname = xmlParseAttribute(ctxt, &attvalue); 8662 if ((attname != NULL) && (attvalue != NULL)) { 8663 /* 8664 * [ WFC: Unique Att Spec ] 8665 * No attribute name may appear more than once in the same 8666 * start-tag or empty-element tag. 8667 */ 8668 for (i = 0; i < nbatts;i += 2) { 8669 if (xmlStrEqual(atts[i], attname)) { 8670 xmlErrAttributeDup(ctxt, NULL, attname); 8671 xmlFree(attvalue); 8672 goto failed; 8673 } 8674 } 8675 /* 8676 * Add the pair to atts 8677 */ 8678 if (atts == NULL) { 8679 maxatts = 22; /* allow for 10 attrs by default */ 8680 atts = (const xmlChar **) 8681 xmlMalloc(maxatts * sizeof(xmlChar *)); 8682 if (atts == NULL) { 8683 xmlErrMemory(ctxt, NULL); 8684 if (attvalue != NULL) 8685 xmlFree(attvalue); 8686 goto failed; 8687 } 8688 ctxt->atts = atts; 8689 ctxt->maxatts = maxatts; 8690 } else if (nbatts + 4 > maxatts) { 8691 const xmlChar **n; 8692 8693 maxatts *= 2; 8694 n = (const xmlChar **) xmlRealloc((void *) atts, 8695 maxatts * sizeof(const xmlChar *)); 8696 if (n == NULL) { 8697 xmlErrMemory(ctxt, NULL); 8698 if (attvalue != NULL) 8699 xmlFree(attvalue); 8700 goto failed; 8701 } 8702 atts = n; 8703 ctxt->atts = atts; 8704 ctxt->maxatts = maxatts; 8705 } 8706 atts[nbatts++] = attname; 8707 atts[nbatts++] = attvalue; 8708 atts[nbatts] = NULL; 8709 atts[nbatts + 1] = NULL; 8710 } else { 8711 if (attvalue != NULL) 8712 xmlFree(attvalue); 8713 } 8714 8715 failed: 8716 8717 GROW 8718 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8719 break; 8720 if (!IS_BLANK_CH(RAW)) { 8721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8722 "attributes construct error\n"); 8723 } 8724 SKIP_BLANKS; 8725 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8726 (attname == NULL) && (attvalue == NULL)) { 8727 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8728 "xmlParseStartTag: problem parsing attributes\n"); 8729 break; 8730 } 8731 SHRINK; 8732 GROW; 8733 } 8734 8735 /* 8736 * SAX: Start of Element ! 8737 */ 8738 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8739 (!ctxt->disableSAX)) { 8740 if (nbatts > 0) 8741 ctxt->sax->startElement(ctxt->userData, name, atts); 8742 else 8743 ctxt->sax->startElement(ctxt->userData, name, NULL); 8744 } 8745 8746 if (atts != NULL) { 8747 /* Free only the content strings */ 8748 for (i = 1;i < nbatts;i+=2) 8749 if (atts[i] != NULL) 8750 xmlFree((xmlChar *) atts[i]); 8751 } 8752 return(name); 8753 } 8754 8755 /** 8756 * xmlParseEndTag1: 8757 * @ctxt: an XML parser context 8758 * @line: line of the start tag 8759 * @nsNr: number of namespaces on the start tag 8760 * 8761 * parse an end of tag 8762 * 8763 * [42] ETag ::= '</' Name S? '>' 8764 * 8765 * With namespace 8766 * 8767 * [NS 9] ETag ::= '</' QName S? '>' 8768 */ 8769 8770 static void 8771 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8772 const xmlChar *name; 8773 8774 GROW; 8775 if ((RAW != '<') || (NXT(1) != '/')) { 8776 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8777 "xmlParseEndTag: '</' not found\n"); 8778 return; 8779 } 8780 SKIP(2); 8781 8782 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8783 8784 /* 8785 * We should definitely be at the ending "S? '>'" part 8786 */ 8787 GROW; 8788 SKIP_BLANKS; 8789 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8790 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8791 } else 8792 NEXT1; 8793 8794 /* 8795 * [ WFC: Element Type Match ] 8796 * The Name in an element's end-tag must match the element type in the 8797 * start-tag. 8798 * 8799 */ 8800 if (name != (xmlChar*)1) { 8801 if (name == NULL) name = BAD_CAST "unparseable"; 8802 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8803 "Opening and ending tag mismatch: %s line %d and %s\n", 8804 ctxt->name, line, name); 8805 } 8806 8807 /* 8808 * SAX: End of Tag 8809 */ 8810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8811 (!ctxt->disableSAX)) 8812 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8813 8814 namePop(ctxt); 8815 spacePop(ctxt); 8816 return; 8817 } 8818 8819 /** 8820 * xmlParseEndTag: 8821 * @ctxt: an XML parser context 8822 * 8823 * parse an end of tag 8824 * 8825 * [42] ETag ::= '</' Name S? '>' 8826 * 8827 * With namespace 8828 * 8829 * [NS 9] ETag ::= '</' QName S? '>' 8830 */ 8831 8832 void 8833 xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8834 xmlParseEndTag1(ctxt, 0); 8835 } 8836 #endif /* LIBXML_SAX1_ENABLED */ 8837 8838 /************************************************************************ 8839 * * 8840 * SAX 2 specific operations * 8841 * * 8842 ************************************************************************/ 8843 8844 /* 8845 * xmlGetNamespace: 8846 * @ctxt: an XML parser context 8847 * @prefix: the prefix to lookup 8848 * 8849 * Lookup the namespace name for the @prefix (which ca be NULL) 8850 * The prefix must come from the @ctxt->dict dictionary 8851 * 8852 * Returns the namespace name or NULL if not bound 8853 */ 8854 static const xmlChar * 8855 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8856 int i; 8857 8858 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8859 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8860 if (ctxt->nsTab[i] == prefix) { 8861 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8862 return(NULL); 8863 return(ctxt->nsTab[i + 1]); 8864 } 8865 return(NULL); 8866 } 8867 8868 /** 8869 * xmlParseQName: 8870 * @ctxt: an XML parser context 8871 * @prefix: pointer to store the prefix part 8872 * 8873 * parse an XML Namespace QName 8874 * 8875 * [6] QName ::= (Prefix ':')? LocalPart 8876 * [7] Prefix ::= NCName 8877 * [8] LocalPart ::= NCName 8878 * 8879 * Returns the Name parsed or NULL 8880 */ 8881 8882 static const xmlChar * 8883 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8884 const xmlChar *l, *p; 8885 8886 GROW; 8887 8888 l = xmlParseNCName(ctxt); 8889 if (l == NULL) { 8890 if (CUR == ':') { 8891 l = xmlParseName(ctxt); 8892 if (l != NULL) { 8893 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8894 "Failed to parse QName '%s'\n", l, NULL, NULL); 8895 *prefix = NULL; 8896 return(l); 8897 } 8898 } 8899 return(NULL); 8900 } 8901 if (CUR == ':') { 8902 NEXT; 8903 p = l; 8904 l = xmlParseNCName(ctxt); 8905 if (l == NULL) { 8906 xmlChar *tmp; 8907 8908 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8909 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8910 l = xmlParseNmtoken(ctxt); 8911 if (l == NULL) 8912 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8913 else { 8914 tmp = xmlBuildQName(l, p, NULL, 0); 8915 xmlFree((char *)l); 8916 } 8917 p = xmlDictLookup(ctxt->dict, tmp, -1); 8918 if (tmp != NULL) xmlFree(tmp); 8919 *prefix = NULL; 8920 return(p); 8921 } 8922 if (CUR == ':') { 8923 xmlChar *tmp; 8924 8925 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8926 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8927 NEXT; 8928 tmp = (xmlChar *) xmlParseName(ctxt); 8929 if (tmp != NULL) { 8930 tmp = xmlBuildQName(tmp, l, NULL, 0); 8931 l = xmlDictLookup(ctxt->dict, tmp, -1); 8932 if (tmp != NULL) xmlFree(tmp); 8933 *prefix = p; 8934 return(l); 8935 } 8936 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8937 l = xmlDictLookup(ctxt->dict, tmp, -1); 8938 if (tmp != NULL) xmlFree(tmp); 8939 *prefix = p; 8940 return(l); 8941 } 8942 *prefix = p; 8943 } else 8944 *prefix = NULL; 8945 return(l); 8946 } 8947 8948 /** 8949 * xmlParseQNameAndCompare: 8950 * @ctxt: an XML parser context 8951 * @name: the localname 8952 * @prefix: the prefix, if any. 8953 * 8954 * parse an XML name and compares for match 8955 * (specialized for endtag parsing) 8956 * 8957 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8958 * and the name for mismatch 8959 */ 8960 8961 static const xmlChar * 8962 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8963 xmlChar const *prefix) { 8964 const xmlChar *cmp; 8965 const xmlChar *in; 8966 const xmlChar *ret; 8967 const xmlChar *prefix2; 8968 8969 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8970 8971 GROW; 8972 in = ctxt->input->cur; 8973 8974 cmp = prefix; 8975 while (*in != 0 && *in == *cmp) { 8976 ++in; 8977 ++cmp; 8978 } 8979 if ((*cmp == 0) && (*in == ':')) { 8980 in++; 8981 cmp = name; 8982 while (*in != 0 && *in == *cmp) { 8983 ++in; 8984 ++cmp; 8985 } 8986 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8987 /* success */ 8988 ctxt->input->cur = in; 8989 return((const xmlChar*) 1); 8990 } 8991 } 8992 /* 8993 * all strings coms from the dictionary, equality can be done directly 8994 */ 8995 ret = xmlParseQName (ctxt, &prefix2); 8996 if ((ret == name) && (prefix == prefix2)) 8997 return((const xmlChar*) 1); 8998 return ret; 8999 } 9000 9001 /** 9002 * xmlParseAttValueInternal: 9003 * @ctxt: an XML parser context 9004 * @len: attribute len result 9005 * @alloc: whether the attribute was reallocated as a new string 9006 * @normalize: if 1 then further non-CDATA normalization must be done 9007 * 9008 * parse a value for an attribute. 9009 * NOTE: if no normalization is needed, the routine will return pointers 9010 * directly from the data buffer. 9011 * 9012 * 3.3.3 Attribute-Value Normalization: 9013 * Before the value of an attribute is passed to the application or 9014 * checked for validity, the XML processor must normalize it as follows: 9015 * - a character reference is processed by appending the referenced 9016 * character to the attribute value 9017 * - an entity reference is processed by recursively processing the 9018 * replacement text of the entity 9019 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 9020 * appending #x20 to the normalized value, except that only a single 9021 * #x20 is appended for a "#xD#xA" sequence that is part of an external 9022 * parsed entity or the literal entity value of an internal parsed entity 9023 * - other characters are processed by appending them to the normalized value 9024 * If the declared value is not CDATA, then the XML processor must further 9025 * process the normalized attribute value by discarding any leading and 9026 * trailing space (#x20) characters, and by replacing sequences of space 9027 * (#x20) characters by a single space (#x20) character. 9028 * All attributes for which no declaration has been read should be treated 9029 * by a non-validating parser as if declared CDATA. 9030 * 9031 * Returns the AttValue parsed or NULL. The value has to be freed by the 9032 * caller if it was copied, this can be detected by val[*len] == 0. 9033 */ 9034 9035 static xmlChar * 9036 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 9037 int normalize) 9038 { 9039 xmlChar limit = 0; 9040 const xmlChar *in = NULL, *start, *end, *last; 9041 xmlChar *ret = NULL; 9042 int line, col; 9043 9044 GROW; 9045 in = (xmlChar *) CUR_PTR; 9046 line = ctxt->input->line; 9047 col = ctxt->input->col; 9048 if (*in != '"' && *in != '\'') { 9049 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 9050 return (NULL); 9051 } 9052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 9053 9054 /* 9055 * try to handle in this routine the most common case where no 9056 * allocation of a new string is required and where content is 9057 * pure ASCII. 9058 */ 9059 limit = *in++; 9060 col++; 9061 end = ctxt->input->end; 9062 start = in; 9063 if (in >= end) { 9064 const xmlChar *oldbase = ctxt->input->base; 9065 GROW; 9066 if (oldbase != ctxt->input->base) { 9067 long delta = ctxt->input->base - oldbase; 9068 start = start + delta; 9069 in = in + delta; 9070 } 9071 end = ctxt->input->end; 9072 } 9073 if (normalize) { 9074 /* 9075 * Skip any leading spaces 9076 */ 9077 while ((in < end) && (*in != limit) && 9078 ((*in == 0x20) || (*in == 0x9) || 9079 (*in == 0xA) || (*in == 0xD))) { 9080 if (*in == 0xA) { 9081 line++; col = 1; 9082 } else { 9083 col++; 9084 } 9085 in++; 9086 start = in; 9087 if (in >= end) { 9088 const xmlChar *oldbase = ctxt->input->base; 9089 GROW; 9090 if (ctxt->instate == XML_PARSER_EOF) 9091 return(NULL); 9092 if (oldbase != ctxt->input->base) { 9093 long delta = ctxt->input->base - oldbase; 9094 start = start + delta; 9095 in = in + delta; 9096 } 9097 end = ctxt->input->end; 9098 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9099 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9101 "AttValue length too long\n"); 9102 return(NULL); 9103 } 9104 } 9105 } 9106 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9108 col++; 9109 if ((*in++ == 0x20) && (*in == 0x20)) break; 9110 if (in >= end) { 9111 const xmlChar *oldbase = ctxt->input->base; 9112 GROW; 9113 if (ctxt->instate == XML_PARSER_EOF) 9114 return(NULL); 9115 if (oldbase != ctxt->input->base) { 9116 long delta = ctxt->input->base - oldbase; 9117 start = start + delta; 9118 in = in + delta; 9119 } 9120 end = ctxt->input->end; 9121 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9122 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9123 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9124 "AttValue length too long\n"); 9125 return(NULL); 9126 } 9127 } 9128 } 9129 last = in; 9130 /* 9131 * skip the trailing blanks 9132 */ 9133 while ((last[-1] == 0x20) && (last > start)) last--; 9134 while ((in < end) && (*in != limit) && 9135 ((*in == 0x20) || (*in == 0x9) || 9136 (*in == 0xA) || (*in == 0xD))) { 9137 if (*in == 0xA) { 9138 line++, col = 1; 9139 } else { 9140 col++; 9141 } 9142 in++; 9143 if (in >= end) { 9144 const xmlChar *oldbase = ctxt->input->base; 9145 GROW; 9146 if (ctxt->instate == XML_PARSER_EOF) 9147 return(NULL); 9148 if (oldbase != ctxt->input->base) { 9149 long delta = ctxt->input->base - oldbase; 9150 start = start + delta; 9151 in = in + delta; 9152 last = last + delta; 9153 } 9154 end = ctxt->input->end; 9155 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9156 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9157 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9158 "AttValue length too long\n"); 9159 return(NULL); 9160 } 9161 } 9162 } 9163 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9164 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9166 "AttValue length too long\n"); 9167 return(NULL); 9168 } 9169 if (*in != limit) goto need_complex; 9170 } else { 9171 while ((in < end) && (*in != limit) && (*in >= 0x20) && 9172 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 9173 in++; 9174 col++; 9175 if (in >= end) { 9176 const xmlChar *oldbase = ctxt->input->base; 9177 GROW; 9178 if (ctxt->instate == XML_PARSER_EOF) 9179 return(NULL); 9180 if (oldbase != ctxt->input->base) { 9181 long delta = ctxt->input->base - oldbase; 9182 start = start + delta; 9183 in = in + delta; 9184 } 9185 end = ctxt->input->end; 9186 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9187 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9188 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9189 "AttValue length too long\n"); 9190 return(NULL); 9191 } 9192 } 9193 } 9194 last = in; 9195 if (((in - start) > XML_MAX_TEXT_LENGTH) && 9196 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9197 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 9198 "AttValue length too long\n"); 9199 return(NULL); 9200 } 9201 if (*in != limit) goto need_complex; 9202 } 9203 in++; 9204 col++; 9205 if (len != NULL) { 9206 *len = last - start; 9207 ret = (xmlChar *) start; 9208 } else { 9209 if (alloc) *alloc = 1; 9210 ret = xmlStrndup(start, last - start); 9211 } 9212 CUR_PTR = in; 9213 ctxt->input->line = line; 9214 ctxt->input->col = col; 9215 if (alloc) *alloc = 0; 9216 return ret; 9217 need_complex: 9218 if (alloc) *alloc = 1; 9219 return xmlParseAttValueComplex(ctxt, len, normalize); 9220 } 9221 9222 /** 9223 * xmlParseAttribute2: 9224 * @ctxt: an XML parser context 9225 * @pref: the element prefix 9226 * @elem: the element name 9227 * @prefix: a xmlChar ** used to store the value of the attribute prefix 9228 * @value: a xmlChar ** used to store the value of the attribute 9229 * @len: an int * to save the length of the attribute 9230 * @alloc: an int * to indicate if the attribute was allocated 9231 * 9232 * parse an attribute in the new SAX2 framework. 9233 * 9234 * Returns the attribute name, and the value in *value, . 9235 */ 9236 9237 static const xmlChar * 9238 xmlParseAttribute2(xmlParserCtxtPtr ctxt, 9239 const xmlChar * pref, const xmlChar * elem, 9240 const xmlChar ** prefix, xmlChar ** value, 9241 int *len, int *alloc) 9242 { 9243 const xmlChar *name; 9244 xmlChar *val, *internal_val = NULL; 9245 int normalize = 0; 9246 9247 *value = NULL; 9248 GROW; 9249 name = xmlParseQName(ctxt, prefix); 9250 if (name == NULL) { 9251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9252 "error parsing attribute name\n"); 9253 return (NULL); 9254 } 9255 9256 /* 9257 * get the type if needed 9258 */ 9259 if (ctxt->attsSpecial != NULL) { 9260 int type; 9261 9262 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 9263 pref, elem, *prefix, name); 9264 if (type != 0) 9265 normalize = 1; 9266 } 9267 9268 /* 9269 * read the value 9270 */ 9271 SKIP_BLANKS; 9272 if (RAW == '=') { 9273 NEXT; 9274 SKIP_BLANKS; 9275 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 9276 if (normalize) { 9277 /* 9278 * Sometimes a second normalisation pass for spaces is needed 9279 * but that only happens if charrefs or entities refernces 9280 * have been used in the attribute value, i.e. the attribute 9281 * value have been extracted in an allocated string already. 9282 */ 9283 if (*alloc) { 9284 const xmlChar *val2; 9285 9286 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 9287 if ((val2 != NULL) && (val2 != val)) { 9288 xmlFree(val); 9289 val = (xmlChar *) val2; 9290 } 9291 } 9292 } 9293 ctxt->instate = XML_PARSER_CONTENT; 9294 } else { 9295 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 9296 "Specification mandate value for attribute %s\n", 9297 name); 9298 return (NULL); 9299 } 9300 9301 if (*prefix == ctxt->str_xml) { 9302 /* 9303 * Check that xml:lang conforms to the specification 9304 * No more registered as an error, just generate a warning now 9305 * since this was deprecated in XML second edition 9306 */ 9307 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 9308 internal_val = xmlStrndup(val, *len); 9309 if (!xmlCheckLanguageID(internal_val)) { 9310 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 9311 "Malformed value for xml:lang : %s\n", 9312 internal_val, NULL); 9313 } 9314 } 9315 9316 /* 9317 * Check that xml:space conforms to the specification 9318 */ 9319 if (xmlStrEqual(name, BAD_CAST "space")) { 9320 internal_val = xmlStrndup(val, *len); 9321 if (xmlStrEqual(internal_val, BAD_CAST "default")) 9322 *(ctxt->space) = 0; 9323 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 9324 *(ctxt->space) = 1; 9325 else { 9326 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 9327 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 9328 internal_val, NULL); 9329 } 9330 } 9331 if (internal_val) { 9332 xmlFree(internal_val); 9333 } 9334 } 9335 9336 *value = val; 9337 return (name); 9338 } 9339 /** 9340 * xmlParseStartTag2: 9341 * @ctxt: an XML parser context 9342 * 9343 * parse a start of tag either for rule element or 9344 * EmptyElement. In both case we don't parse the tag closing chars. 9345 * This routine is called when running SAX2 parsing 9346 * 9347 * [40] STag ::= '<' Name (S Attribute)* S? '>' 9348 * 9349 * [ WFC: Unique Att Spec ] 9350 * No attribute name may appear more than once in the same start-tag or 9351 * empty-element tag. 9352 * 9353 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 9354 * 9355 * [ WFC: Unique Att Spec ] 9356 * No attribute name may appear more than once in the same start-tag or 9357 * empty-element tag. 9358 * 9359 * With namespace: 9360 * 9361 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 9362 * 9363 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 9364 * 9365 * Returns the element name parsed 9366 */ 9367 9368 static const xmlChar * 9369 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 9370 const xmlChar **URI, int *tlen) { 9371 const xmlChar *localname; 9372 const xmlChar *prefix; 9373 const xmlChar *attname; 9374 const xmlChar *aprefix; 9375 const xmlChar *nsname; 9376 xmlChar *attvalue; 9377 const xmlChar **atts = ctxt->atts; 9378 int maxatts = ctxt->maxatts; 9379 int nratts, nbatts, nbdef; 9380 int i, j, nbNs, attval, oldline, oldcol, inputNr; 9381 const xmlChar *base; 9382 unsigned long cur; 9383 int nsNr = ctxt->nsNr; 9384 9385 if (RAW != '<') return(NULL); 9386 NEXT1; 9387 9388 /* 9389 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 9390 * point since the attribute values may be stored as pointers to 9391 * the buffer and calling SHRINK would destroy them ! 9392 * The Shrinking is only possible once the full set of attribute 9393 * callbacks have been done. 9394 */ 9395 reparse: 9396 SHRINK; 9397 base = ctxt->input->base; 9398 cur = ctxt->input->cur - ctxt->input->base; 9399 inputNr = ctxt->inputNr; 9400 oldline = ctxt->input->line; 9401 oldcol = ctxt->input->col; 9402 nbatts = 0; 9403 nratts = 0; 9404 nbdef = 0; 9405 nbNs = 0; 9406 attval = 0; 9407 /* Forget any namespaces added during an earlier parse of this element. */ 9408 ctxt->nsNr = nsNr; 9409 9410 localname = xmlParseQName(ctxt, &prefix); 9411 if (localname == NULL) { 9412 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 9413 "StartTag: invalid element name\n"); 9414 return(NULL); 9415 } 9416 *tlen = ctxt->input->cur - ctxt->input->base - cur; 9417 9418 /* 9419 * Now parse the attributes, it ends up with the ending 9420 * 9421 * (S Attribute)* S? 9422 */ 9423 SKIP_BLANKS; 9424 GROW; 9425 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9426 goto base_changed; 9427 9428 while (((RAW != '>') && 9429 ((RAW != '/') || (NXT(1) != '>')) && 9430 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) { 9431 const xmlChar *q = CUR_PTR; 9432 unsigned int cons = ctxt->input->consumed; 9433 int len = -1, alloc = 0; 9434 9435 attname = xmlParseAttribute2(ctxt, prefix, localname, 9436 &aprefix, &attvalue, &len, &alloc); 9437 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) { 9438 if ((attvalue != NULL) && (alloc != 0)) 9439 xmlFree(attvalue); 9440 attvalue = NULL; 9441 goto base_changed; 9442 } 9443 if ((attname != NULL) && (attvalue != NULL)) { 9444 if (len < 0) len = xmlStrlen(attvalue); 9445 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9446 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9447 xmlURIPtr uri; 9448 9449 if (URL == NULL) { 9450 xmlErrMemory(ctxt, "dictionary allocation failure"); 9451 if ((attvalue != NULL) && (alloc != 0)) 9452 xmlFree(attvalue); 9453 return(NULL); 9454 } 9455 if (*URL != 0) { 9456 uri = xmlParseURI((const char *) URL); 9457 if (uri == NULL) { 9458 xmlNsErr(ctxt, XML_WAR_NS_URI, 9459 "xmlns: '%s' is not a valid URI\n", 9460 URL, NULL, NULL); 9461 } else { 9462 if (uri->scheme == NULL) { 9463 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9464 "xmlns: URI %s is not absolute\n", 9465 URL, NULL, NULL); 9466 } 9467 xmlFreeURI(uri); 9468 } 9469 if (URL == ctxt->str_xml_ns) { 9470 if (attname != ctxt->str_xml) { 9471 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9472 "xml namespace URI cannot be the default namespace\n", 9473 NULL, NULL, NULL); 9474 } 9475 goto skip_default_ns; 9476 } 9477 if ((len == 29) && 9478 (xmlStrEqual(URL, 9479 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9480 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9481 "reuse of the xmlns namespace name is forbidden\n", 9482 NULL, NULL, NULL); 9483 goto skip_default_ns; 9484 } 9485 } 9486 /* 9487 * check that it's not a defined namespace 9488 */ 9489 for (j = 1;j <= nbNs;j++) 9490 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9491 break; 9492 if (j <= nbNs) 9493 xmlErrAttributeDup(ctxt, NULL, attname); 9494 else 9495 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 9496 skip_default_ns: 9497 if ((attvalue != NULL) && (alloc != 0)) { 9498 xmlFree(attvalue); 9499 attvalue = NULL; 9500 } 9501 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9502 break; 9503 if (!IS_BLANK_CH(RAW)) { 9504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9505 "attributes construct error\n"); 9506 break; 9507 } 9508 SKIP_BLANKS; 9509 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9510 goto base_changed; 9511 continue; 9512 } 9513 if (aprefix == ctxt->str_xmlns) { 9514 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 9515 xmlURIPtr uri; 9516 9517 if (attname == ctxt->str_xml) { 9518 if (URL != ctxt->str_xml_ns) { 9519 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9520 "xml namespace prefix mapped to wrong URI\n", 9521 NULL, NULL, NULL); 9522 } 9523 /* 9524 * Do not keep a namespace definition node 9525 */ 9526 goto skip_ns; 9527 } 9528 if (URL == ctxt->str_xml_ns) { 9529 if (attname != ctxt->str_xml) { 9530 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9531 "xml namespace URI mapped to wrong prefix\n", 9532 NULL, NULL, NULL); 9533 } 9534 goto skip_ns; 9535 } 9536 if (attname == ctxt->str_xmlns) { 9537 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9538 "redefinition of the xmlns prefix is forbidden\n", 9539 NULL, NULL, NULL); 9540 goto skip_ns; 9541 } 9542 if ((len == 29) && 9543 (xmlStrEqual(URL, 9544 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9546 "reuse of the xmlns namespace name is forbidden\n", 9547 NULL, NULL, NULL); 9548 goto skip_ns; 9549 } 9550 if ((URL == NULL) || (URL[0] == 0)) { 9551 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 9552 "xmlns:%s: Empty XML namespace is not allowed\n", 9553 attname, NULL, NULL); 9554 goto skip_ns; 9555 } else { 9556 uri = xmlParseURI((const char *) URL); 9557 if (uri == NULL) { 9558 xmlNsErr(ctxt, XML_WAR_NS_URI, 9559 "xmlns:%s: '%s' is not a valid URI\n", 9560 attname, URL, NULL); 9561 } else { 9562 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 9563 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 9564 "xmlns:%s: URI %s is not absolute\n", 9565 attname, URL, NULL); 9566 } 9567 xmlFreeURI(uri); 9568 } 9569 } 9570 9571 /* 9572 * check that it's not a defined namespace 9573 */ 9574 for (j = 1;j <= nbNs;j++) 9575 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9576 break; 9577 if (j <= nbNs) 9578 xmlErrAttributeDup(ctxt, aprefix, attname); 9579 else 9580 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 9581 skip_ns: 9582 if ((attvalue != NULL) && (alloc != 0)) { 9583 xmlFree(attvalue); 9584 attvalue = NULL; 9585 } 9586 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9587 break; 9588 if (!IS_BLANK_CH(RAW)) { 9589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9590 "attributes construct error\n"); 9591 break; 9592 } 9593 SKIP_BLANKS; 9594 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9595 goto base_changed; 9596 continue; 9597 } 9598 9599 /* 9600 * Add the pair to atts 9601 */ 9602 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9603 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9604 if (attvalue[len] == 0) 9605 xmlFree(attvalue); 9606 goto failed; 9607 } 9608 maxatts = ctxt->maxatts; 9609 atts = ctxt->atts; 9610 } 9611 ctxt->attallocs[nratts++] = alloc; 9612 atts[nbatts++] = attname; 9613 atts[nbatts++] = aprefix; 9614 atts[nbatts++] = NULL; /* the URI will be fetched later */ 9615 atts[nbatts++] = attvalue; 9616 attvalue += len; 9617 atts[nbatts++] = attvalue; 9618 /* 9619 * tag if some deallocation is needed 9620 */ 9621 if (alloc != 0) attval = 1; 9622 } else { 9623 if ((attvalue != NULL) && (attvalue[len] == 0)) 9624 xmlFree(attvalue); 9625 } 9626 9627 failed: 9628 9629 GROW 9630 if (ctxt->instate == XML_PARSER_EOF) 9631 break; 9632 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9633 goto base_changed; 9634 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 9635 break; 9636 if (!IS_BLANK_CH(RAW)) { 9637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9638 "attributes construct error\n"); 9639 break; 9640 } 9641 SKIP_BLANKS; 9642 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 9643 (attname == NULL) && (attvalue == NULL)) { 9644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9645 "xmlParseStartTag: problem parsing attributes\n"); 9646 break; 9647 } 9648 GROW; 9649 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) 9650 goto base_changed; 9651 } 9652 9653 /* 9654 * The attributes defaulting 9655 */ 9656 if (ctxt->attsDefault != NULL) { 9657 xmlDefAttrsPtr defaults; 9658 9659 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 9660 if (defaults != NULL) { 9661 for (i = 0;i < defaults->nbAttrs;i++) { 9662 attname = defaults->values[5 * i]; 9663 aprefix = defaults->values[5 * i + 1]; 9664 9665 /* 9666 * special work for namespaces defaulted defs 9667 */ 9668 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 9669 /* 9670 * check that it's not a defined namespace 9671 */ 9672 for (j = 1;j <= nbNs;j++) 9673 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 9674 break; 9675 if (j <= nbNs) continue; 9676 9677 nsname = xmlGetNamespace(ctxt, NULL); 9678 if (nsname != defaults->values[5 * i + 2]) { 9679 if (nsPush(ctxt, NULL, 9680 defaults->values[5 * i + 2]) > 0) 9681 nbNs++; 9682 } 9683 } else if (aprefix == ctxt->str_xmlns) { 9684 /* 9685 * check that it's not a defined namespace 9686 */ 9687 for (j = 1;j <= nbNs;j++) 9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9689 break; 9690 if (j <= nbNs) continue; 9691 9692 nsname = xmlGetNamespace(ctxt, attname); 9693 if (nsname != defaults->values[2]) { 9694 if (nsPush(ctxt, attname, 9695 defaults->values[5 * i + 2]) > 0) 9696 nbNs++; 9697 } 9698 } else { 9699 /* 9700 * check that it's not a defined attribute 9701 */ 9702 for (j = 0;j < nbatts;j+=5) { 9703 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9704 break; 9705 } 9706 if (j < nbatts) continue; 9707 9708 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9709 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9710 return(NULL); 9711 } 9712 maxatts = ctxt->maxatts; 9713 atts = ctxt->atts; 9714 } 9715 atts[nbatts++] = attname; 9716 atts[nbatts++] = aprefix; 9717 if (aprefix == NULL) 9718 atts[nbatts++] = NULL; 9719 else 9720 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9721 atts[nbatts++] = defaults->values[5 * i + 2]; 9722 atts[nbatts++] = defaults->values[5 * i + 3]; 9723 if ((ctxt->standalone == 1) && 9724 (defaults->values[5 * i + 4] != NULL)) { 9725 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9726 "standalone: attribute %s on %s defaulted from external subset\n", 9727 attname, localname); 9728 } 9729 nbdef++; 9730 } 9731 } 9732 } 9733 } 9734 9735 /* 9736 * The attributes checkings 9737 */ 9738 for (i = 0; i < nbatts;i += 5) { 9739 /* 9740 * The default namespace does not apply to attribute names. 9741 */ 9742 if (atts[i + 1] != NULL) { 9743 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9744 if (nsname == NULL) { 9745 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9746 "Namespace prefix %s for %s on %s is not defined\n", 9747 atts[i + 1], atts[i], localname); 9748 } 9749 atts[i + 2] = nsname; 9750 } else 9751 nsname = NULL; 9752 /* 9753 * [ WFC: Unique Att Spec ] 9754 * No attribute name may appear more than once in the same 9755 * start-tag or empty-element tag. 9756 * As extended by the Namespace in XML REC. 9757 */ 9758 for (j = 0; j < i;j += 5) { 9759 if (atts[i] == atts[j]) { 9760 if (atts[i+1] == atts[j+1]) { 9761 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9762 break; 9763 } 9764 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9765 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9766 "Namespaced Attribute %s in '%s' redefined\n", 9767 atts[i], nsname, NULL); 9768 break; 9769 } 9770 } 9771 } 9772 } 9773 9774 nsname = xmlGetNamespace(ctxt, prefix); 9775 if ((prefix != NULL) && (nsname == NULL)) { 9776 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9777 "Namespace prefix %s on %s is not defined\n", 9778 prefix, localname, NULL); 9779 } 9780 *pref = prefix; 9781 *URI = nsname; 9782 9783 /* 9784 * SAX: Start of Element ! 9785 */ 9786 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9787 (!ctxt->disableSAX)) { 9788 if (nbNs > 0) 9789 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9790 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9791 nbatts / 5, nbdef, atts); 9792 else 9793 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9794 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9795 } 9796 9797 /* 9798 * Free up attribute allocated strings if needed 9799 */ 9800 if (attval != 0) { 9801 for (i = 3,j = 0; j < nratts;i += 5,j++) 9802 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9803 xmlFree((xmlChar *) atts[i]); 9804 } 9805 9806 return(localname); 9807 9808 base_changed: 9809 /* 9810 * the attribute strings are valid iif the base didn't changed 9811 */ 9812 if (attval != 0) { 9813 for (i = 3,j = 0; j < nratts;i += 5,j++) 9814 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9815 xmlFree((xmlChar *) atts[i]); 9816 } 9817 9818 /* 9819 * We can't switch from one entity to another in the middle 9820 * of a start tag 9821 */ 9822 if (inputNr != ctxt->inputNr) { 9823 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 9824 "Start tag doesn't start and stop in the same entity\n"); 9825 return(NULL); 9826 } 9827 9828 ctxt->input->cur = ctxt->input->base + cur; 9829 ctxt->input->line = oldline; 9830 ctxt->input->col = oldcol; 9831 if (ctxt->wellFormed == 1) { 9832 goto reparse; 9833 } 9834 return(NULL); 9835 } 9836 9837 /** 9838 * xmlParseEndTag2: 9839 * @ctxt: an XML parser context 9840 * @line: line of the start tag 9841 * @nsNr: number of namespaces on the start tag 9842 * 9843 * parse an end of tag 9844 * 9845 * [42] ETag ::= '</' Name S? '>' 9846 * 9847 * With namespace 9848 * 9849 * [NS 9] ETag ::= '</' QName S? '>' 9850 */ 9851 9852 static void 9853 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9854 const xmlChar *URI, int line, int nsNr, int tlen) { 9855 const xmlChar *name; 9856 size_t curLength; 9857 9858 GROW; 9859 if ((RAW != '<') || (NXT(1) != '/')) { 9860 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9861 return; 9862 } 9863 SKIP(2); 9864 9865 curLength = ctxt->input->end - ctxt->input->cur; 9866 if ((tlen > 0) && (curLength >= (size_t)tlen) && 9867 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9868 if ((curLength >= (size_t)(tlen + 1)) && 9869 (ctxt->input->cur[tlen] == '>')) { 9870 ctxt->input->cur += tlen + 1; 9871 ctxt->input->col += tlen + 1; 9872 goto done; 9873 } 9874 ctxt->input->cur += tlen; 9875 ctxt->input->col += tlen; 9876 name = (xmlChar*)1; 9877 } else { 9878 if (prefix == NULL) 9879 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9880 else 9881 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9882 } 9883 9884 /* 9885 * We should definitely be at the ending "S? '>'" part 9886 */ 9887 GROW; 9888 if (ctxt->instate == XML_PARSER_EOF) 9889 return; 9890 SKIP_BLANKS; 9891 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9892 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9893 } else 9894 NEXT1; 9895 9896 /* 9897 * [ WFC: Element Type Match ] 9898 * The Name in an element's end-tag must match the element type in the 9899 * start-tag. 9900 * 9901 */ 9902 if (name != (xmlChar*)1) { 9903 if (name == NULL) name = BAD_CAST "unparseable"; 9904 if ((line == 0) && (ctxt->node != NULL)) 9905 line = ctxt->node->line; 9906 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9907 "Opening and ending tag mismatch: %s line %d and %s\n", 9908 ctxt->name, line, name); 9909 } 9910 9911 /* 9912 * SAX: End of Tag 9913 */ 9914 done: 9915 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9916 (!ctxt->disableSAX)) 9917 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9918 9919 spacePop(ctxt); 9920 if (nsNr != 0) 9921 nsPop(ctxt, nsNr); 9922 return; 9923 } 9924 9925 /** 9926 * xmlParseCDSect: 9927 * @ctxt: an XML parser context 9928 * 9929 * Parse escaped pure raw content. 9930 * 9931 * [18] CDSect ::= CDStart CData CDEnd 9932 * 9933 * [19] CDStart ::= '<![CDATA[' 9934 * 9935 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9936 * 9937 * [21] CDEnd ::= ']]>' 9938 */ 9939 void 9940 xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9941 xmlChar *buf = NULL; 9942 int len = 0; 9943 int size = XML_PARSER_BUFFER_SIZE; 9944 int r, rl; 9945 int s, sl; 9946 int cur, l; 9947 int count = 0; 9948 9949 /* Check 2.6.0 was NXT(0) not RAW */ 9950 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9951 SKIP(9); 9952 } else 9953 return; 9954 9955 ctxt->instate = XML_PARSER_CDATA_SECTION; 9956 r = CUR_CHAR(rl); 9957 if (!IS_CHAR(r)) { 9958 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9959 ctxt->instate = XML_PARSER_CONTENT; 9960 return; 9961 } 9962 NEXTL(rl); 9963 s = CUR_CHAR(sl); 9964 if (!IS_CHAR(s)) { 9965 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9966 ctxt->instate = XML_PARSER_CONTENT; 9967 return; 9968 } 9969 NEXTL(sl); 9970 cur = CUR_CHAR(l); 9971 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9972 if (buf == NULL) { 9973 xmlErrMemory(ctxt, NULL); 9974 return; 9975 } 9976 while (IS_CHAR(cur) && 9977 ((r != ']') || (s != ']') || (cur != '>'))) { 9978 if (len + 5 >= size) { 9979 xmlChar *tmp; 9980 9981 if ((size > XML_MAX_TEXT_LENGTH) && 9982 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9983 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9984 "CData section too big found", NULL); 9985 xmlFree (buf); 9986 return; 9987 } 9988 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar)); 9989 if (tmp == NULL) { 9990 xmlFree(buf); 9991 xmlErrMemory(ctxt, NULL); 9992 return; 9993 } 9994 buf = tmp; 9995 size *= 2; 9996 } 9997 COPY_BUF(rl,buf,len,r); 9998 r = s; 9999 rl = sl; 10000 s = cur; 10001 sl = l; 10002 count++; 10003 if (count > 50) { 10004 GROW; 10005 if (ctxt->instate == XML_PARSER_EOF) { 10006 xmlFree(buf); 10007 return; 10008 } 10009 count = 0; 10010 } 10011 NEXTL(l); 10012 cur = CUR_CHAR(l); 10013 } 10014 buf[len] = 0; 10015 ctxt->instate = XML_PARSER_CONTENT; 10016 if (cur != '>') { 10017 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 10018 "CData section not finished\n%.50s\n", buf); 10019 xmlFree(buf); 10020 return; 10021 } 10022 NEXTL(l); 10023 10024 /* 10025 * OK the buffer is to be consumed as cdata. 10026 */ 10027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10028 if (ctxt->sax->cdataBlock != NULL) 10029 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 10030 else if (ctxt->sax->characters != NULL) 10031 ctxt->sax->characters(ctxt->userData, buf, len); 10032 } 10033 xmlFree(buf); 10034 } 10035 10036 /** 10037 * xmlParseContent: 10038 * @ctxt: an XML parser context 10039 * 10040 * Parse a content: 10041 * 10042 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 10043 */ 10044 10045 void 10046 xmlParseContent(xmlParserCtxtPtr ctxt) { 10047 GROW; 10048 while ((RAW != 0) && 10049 ((RAW != '<') || (NXT(1) != '/')) && 10050 (ctxt->instate != XML_PARSER_EOF)) { 10051 const xmlChar *test = CUR_PTR; 10052 unsigned int cons = ctxt->input->consumed; 10053 const xmlChar *cur = ctxt->input->cur; 10054 10055 /* 10056 * First case : a Processing Instruction. 10057 */ 10058 if ((*cur == '<') && (cur[1] == '?')) { 10059 xmlParsePI(ctxt); 10060 } 10061 10062 /* 10063 * Second case : a CDSection 10064 */ 10065 /* 2.6.0 test was *cur not RAW */ 10066 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 10067 xmlParseCDSect(ctxt); 10068 } 10069 10070 /* 10071 * Third case : a comment 10072 */ 10073 else if ((*cur == '<') && (NXT(1) == '!') && 10074 (NXT(2) == '-') && (NXT(3) == '-')) { 10075 xmlParseComment(ctxt); 10076 ctxt->instate = XML_PARSER_CONTENT; 10077 } 10078 10079 /* 10080 * Fourth case : a sub-element. 10081 */ 10082 else if (*cur == '<') { 10083 xmlParseElement(ctxt); 10084 } 10085 10086 /* 10087 * Fifth case : a reference. If if has not been resolved, 10088 * parsing returns it's Name, create the node 10089 */ 10090 10091 else if (*cur == '&') { 10092 xmlParseReference(ctxt); 10093 } 10094 10095 /* 10096 * Last case, text. Note that References are handled directly. 10097 */ 10098 else { 10099 xmlParseCharData(ctxt, 0); 10100 } 10101 10102 GROW; 10103 /* 10104 * Pop-up of finished entities. 10105 */ 10106 while ((RAW == 0) && (ctxt->inputNr > 1)) 10107 xmlPopInput(ctxt); 10108 SHRINK; 10109 10110 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10112 "detected an error in element content\n"); 10113 xmlHaltParser(ctxt); 10114 break; 10115 } 10116 } 10117 } 10118 10119 /** 10120 * xmlParseElement: 10121 * @ctxt: an XML parser context 10122 * 10123 * parse an XML element, this is highly recursive 10124 * 10125 * [39] element ::= EmptyElemTag | STag content ETag 10126 * 10127 * [ WFC: Element Type Match ] 10128 * The Name in an element's end-tag must match the element type in the 10129 * start-tag. 10130 * 10131 */ 10132 10133 void 10134 xmlParseElement(xmlParserCtxtPtr ctxt) { 10135 const xmlChar *name; 10136 const xmlChar *prefix = NULL; 10137 const xmlChar *URI = NULL; 10138 xmlParserNodeInfo node_info; 10139 int line, tlen = 0; 10140 xmlNodePtr ret; 10141 int nsNr = ctxt->nsNr; 10142 10143 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 10144 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 10145 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 10146 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 10147 xmlParserMaxDepth); 10148 xmlHaltParser(ctxt); 10149 return; 10150 } 10151 10152 /* Capture start position */ 10153 if (ctxt->record_info) { 10154 node_info.begin_pos = ctxt->input->consumed + 10155 (CUR_PTR - ctxt->input->base); 10156 node_info.begin_line = ctxt->input->line; 10157 } 10158 10159 if (ctxt->spaceNr == 0) 10160 spacePush(ctxt, -1); 10161 else if (*ctxt->space == -2) 10162 spacePush(ctxt, -1); 10163 else 10164 spacePush(ctxt, *ctxt->space); 10165 10166 line = ctxt->input->line; 10167 #ifdef LIBXML_SAX1_ENABLED 10168 if (ctxt->sax2) 10169 #endif /* LIBXML_SAX1_ENABLED */ 10170 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10171 #ifdef LIBXML_SAX1_ENABLED 10172 else 10173 name = xmlParseStartTag(ctxt); 10174 #endif /* LIBXML_SAX1_ENABLED */ 10175 if (ctxt->instate == XML_PARSER_EOF) 10176 return; 10177 if (name == NULL) { 10178 spacePop(ctxt); 10179 return; 10180 } 10181 namePush(ctxt, name); 10182 ret = ctxt->node; 10183 10184 #ifdef LIBXML_VALID_ENABLED 10185 /* 10186 * [ VC: Root Element Type ] 10187 * The Name in the document type declaration must match the element 10188 * type of the root element. 10189 */ 10190 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10191 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10192 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10193 #endif /* LIBXML_VALID_ENABLED */ 10194 10195 /* 10196 * Check for an Empty Element. 10197 */ 10198 if ((RAW == '/') && (NXT(1) == '>')) { 10199 SKIP(2); 10200 if (ctxt->sax2) { 10201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 10202 (!ctxt->disableSAX)) 10203 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 10204 #ifdef LIBXML_SAX1_ENABLED 10205 } else { 10206 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 10207 (!ctxt->disableSAX)) 10208 ctxt->sax->endElement(ctxt->userData, name); 10209 #endif /* LIBXML_SAX1_ENABLED */ 10210 } 10211 namePop(ctxt); 10212 spacePop(ctxt); 10213 if (nsNr != ctxt->nsNr) 10214 nsPop(ctxt, ctxt->nsNr - nsNr); 10215 if ( ret != NULL && ctxt->record_info ) { 10216 node_info.end_pos = ctxt->input->consumed + 10217 (CUR_PTR - ctxt->input->base); 10218 node_info.end_line = ctxt->input->line; 10219 node_info.node = ret; 10220 xmlParserAddNodeInfo(ctxt, &node_info); 10221 } 10222 return; 10223 } 10224 if (RAW == '>') { 10225 NEXT1; 10226 } else { 10227 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 10228 "Couldn't find end of Start Tag %s line %d\n", 10229 name, line, NULL); 10230 10231 /* 10232 * end of parsing of this node. 10233 */ 10234 nodePop(ctxt); 10235 namePop(ctxt); 10236 spacePop(ctxt); 10237 if (nsNr != ctxt->nsNr) 10238 nsPop(ctxt, ctxt->nsNr - nsNr); 10239 10240 /* 10241 * Capture end position and add node 10242 */ 10243 if ( ret != NULL && ctxt->record_info ) { 10244 node_info.end_pos = ctxt->input->consumed + 10245 (CUR_PTR - ctxt->input->base); 10246 node_info.end_line = ctxt->input->line; 10247 node_info.node = ret; 10248 xmlParserAddNodeInfo(ctxt, &node_info); 10249 } 10250 return; 10251 } 10252 10253 /* 10254 * Parse the content of the element: 10255 */ 10256 xmlParseContent(ctxt); 10257 if (ctxt->instate == XML_PARSER_EOF) 10258 return; 10259 if (!IS_BYTE_CHAR(RAW)) { 10260 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 10261 "Premature end of data in tag %s line %d\n", 10262 name, line, NULL); 10263 10264 /* 10265 * end of parsing of this node. 10266 */ 10267 nodePop(ctxt); 10268 namePop(ctxt); 10269 spacePop(ctxt); 10270 if (nsNr != ctxt->nsNr) 10271 nsPop(ctxt, ctxt->nsNr - nsNr); 10272 return; 10273 } 10274 10275 /* 10276 * parse the end of tag: '</' should be here. 10277 */ 10278 if (ctxt->sax2) { 10279 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 10280 namePop(ctxt); 10281 } 10282 #ifdef LIBXML_SAX1_ENABLED 10283 else 10284 xmlParseEndTag1(ctxt, line); 10285 #endif /* LIBXML_SAX1_ENABLED */ 10286 10287 /* 10288 * Capture end position and add node 10289 */ 10290 if ( ret != NULL && ctxt->record_info ) { 10291 node_info.end_pos = ctxt->input->consumed + 10292 (CUR_PTR - ctxt->input->base); 10293 node_info.end_line = ctxt->input->line; 10294 node_info.node = ret; 10295 xmlParserAddNodeInfo(ctxt, &node_info); 10296 } 10297 } 10298 10299 /** 10300 * xmlParseVersionNum: 10301 * @ctxt: an XML parser context 10302 * 10303 * parse the XML version value. 10304 * 10305 * [26] VersionNum ::= '1.' [0-9]+ 10306 * 10307 * In practice allow [0-9].[0-9]+ at that level 10308 * 10309 * Returns the string giving the XML version number, or NULL 10310 */ 10311 xmlChar * 10312 xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 10313 xmlChar *buf = NULL; 10314 int len = 0; 10315 int size = 10; 10316 xmlChar cur; 10317 10318 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10319 if (buf == NULL) { 10320 xmlErrMemory(ctxt, NULL); 10321 return(NULL); 10322 } 10323 cur = CUR; 10324 if (!((cur >= '0') && (cur <= '9'))) { 10325 xmlFree(buf); 10326 return(NULL); 10327 } 10328 buf[len++] = cur; 10329 NEXT; 10330 cur=CUR; 10331 if (cur != '.') { 10332 xmlFree(buf); 10333 return(NULL); 10334 } 10335 buf[len++] = cur; 10336 NEXT; 10337 cur=CUR; 10338 while ((cur >= '0') && (cur <= '9')) { 10339 if (len + 1 >= size) { 10340 xmlChar *tmp; 10341 10342 size *= 2; 10343 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10344 if (tmp == NULL) { 10345 xmlFree(buf); 10346 xmlErrMemory(ctxt, NULL); 10347 return(NULL); 10348 } 10349 buf = tmp; 10350 } 10351 buf[len++] = cur; 10352 NEXT; 10353 cur=CUR; 10354 } 10355 buf[len] = 0; 10356 return(buf); 10357 } 10358 10359 /** 10360 * xmlParseVersionInfo: 10361 * @ctxt: an XML parser context 10362 * 10363 * parse the XML version. 10364 * 10365 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 10366 * 10367 * [25] Eq ::= S? '=' S? 10368 * 10369 * Returns the version string, e.g. "1.0" 10370 */ 10371 10372 xmlChar * 10373 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 10374 xmlChar *version = NULL; 10375 10376 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 10377 SKIP(7); 10378 SKIP_BLANKS; 10379 if (RAW != '=') { 10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10381 return(NULL); 10382 } 10383 NEXT; 10384 SKIP_BLANKS; 10385 if (RAW == '"') { 10386 NEXT; 10387 version = xmlParseVersionNum(ctxt); 10388 if (RAW != '"') { 10389 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10390 } else 10391 NEXT; 10392 } else if (RAW == '\''){ 10393 NEXT; 10394 version = xmlParseVersionNum(ctxt); 10395 if (RAW != '\'') { 10396 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10397 } else 10398 NEXT; 10399 } else { 10400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10401 } 10402 } 10403 return(version); 10404 } 10405 10406 /** 10407 * xmlParseEncName: 10408 * @ctxt: an XML parser context 10409 * 10410 * parse the XML encoding name 10411 * 10412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 10413 * 10414 * Returns the encoding name value or NULL 10415 */ 10416 xmlChar * 10417 xmlParseEncName(xmlParserCtxtPtr ctxt) { 10418 xmlChar *buf = NULL; 10419 int len = 0; 10420 int size = 10; 10421 xmlChar cur; 10422 10423 cur = CUR; 10424 if (((cur >= 'a') && (cur <= 'z')) || 10425 ((cur >= 'A') && (cur <= 'Z'))) { 10426 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 10427 if (buf == NULL) { 10428 xmlErrMemory(ctxt, NULL); 10429 return(NULL); 10430 } 10431 10432 buf[len++] = cur; 10433 NEXT; 10434 cur = CUR; 10435 while (((cur >= 'a') && (cur <= 'z')) || 10436 ((cur >= 'A') && (cur <= 'Z')) || 10437 ((cur >= '0') && (cur <= '9')) || 10438 (cur == '.') || (cur == '_') || 10439 (cur == '-')) { 10440 if (len + 1 >= size) { 10441 xmlChar *tmp; 10442 10443 size *= 2; 10444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 10445 if (tmp == NULL) { 10446 xmlErrMemory(ctxt, NULL); 10447 xmlFree(buf); 10448 return(NULL); 10449 } 10450 buf = tmp; 10451 } 10452 buf[len++] = cur; 10453 NEXT; 10454 cur = CUR; 10455 if (cur == 0) { 10456 SHRINK; 10457 GROW; 10458 cur = CUR; 10459 } 10460 } 10461 buf[len] = 0; 10462 } else { 10463 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 10464 } 10465 return(buf); 10466 } 10467 10468 /** 10469 * xmlParseEncodingDecl: 10470 * @ctxt: an XML parser context 10471 * 10472 * parse the XML encoding declaration 10473 * 10474 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 10475 * 10476 * this setups the conversion filters. 10477 * 10478 * Returns the encoding value or NULL 10479 */ 10480 10481 const xmlChar * 10482 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 10483 xmlChar *encoding = NULL; 10484 10485 SKIP_BLANKS; 10486 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 10487 SKIP(8); 10488 SKIP_BLANKS; 10489 if (RAW != '=') { 10490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10491 return(NULL); 10492 } 10493 NEXT; 10494 SKIP_BLANKS; 10495 if (RAW == '"') { 10496 NEXT; 10497 encoding = xmlParseEncName(ctxt); 10498 if (RAW != '"') { 10499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10500 xmlFree((xmlChar *) encoding); 10501 return(NULL); 10502 } else 10503 NEXT; 10504 } else if (RAW == '\''){ 10505 NEXT; 10506 encoding = xmlParseEncName(ctxt); 10507 if (RAW != '\'') { 10508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10509 xmlFree((xmlChar *) encoding); 10510 return(NULL); 10511 } else 10512 NEXT; 10513 } else { 10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10515 } 10516 10517 /* 10518 * Non standard parsing, allowing the user to ignore encoding 10519 */ 10520 if (ctxt->options & XML_PARSE_IGNORE_ENC) { 10521 xmlFree((xmlChar *) encoding); 10522 return(NULL); 10523 } 10524 10525 /* 10526 * UTF-16 encoding stwich has already taken place at this stage, 10527 * more over the little-endian/big-endian selection is already done 10528 */ 10529 if ((encoding != NULL) && 10530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 10531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 10532 /* 10533 * If no encoding was passed to the parser, that we are 10534 * using UTF-16 and no decoder is present i.e. the 10535 * document is apparently UTF-8 compatible, then raise an 10536 * encoding mismatch fatal error 10537 */ 10538 if ((ctxt->encoding == NULL) && 10539 (ctxt->input->buf != NULL) && 10540 (ctxt->input->buf->encoder == NULL)) { 10541 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 10542 "Document labelled UTF-16 but has UTF-8 content\n"); 10543 } 10544 if (ctxt->encoding != NULL) 10545 xmlFree((xmlChar *) ctxt->encoding); 10546 ctxt->encoding = encoding; 10547 } 10548 /* 10549 * UTF-8 encoding is handled natively 10550 */ 10551 else if ((encoding != NULL) && 10552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 10553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 10554 if (ctxt->encoding != NULL) 10555 xmlFree((xmlChar *) ctxt->encoding); 10556 ctxt->encoding = encoding; 10557 } 10558 else if (encoding != NULL) { 10559 xmlCharEncodingHandlerPtr handler; 10560 10561 if (ctxt->input->encoding != NULL) 10562 xmlFree((xmlChar *) ctxt->input->encoding); 10563 ctxt->input->encoding = encoding; 10564 10565 handler = xmlFindCharEncodingHandler((const char *) encoding); 10566 if (handler != NULL) { 10567 if (xmlSwitchToEncoding(ctxt, handler) < 0) { 10568 /* failed to convert */ 10569 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 10570 return(NULL); 10571 } 10572 } else { 10573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 10574 "Unsupported encoding %s\n", encoding); 10575 return(NULL); 10576 } 10577 } 10578 } 10579 return(encoding); 10580 } 10581 10582 /** 10583 * xmlParseSDDecl: 10584 * @ctxt: an XML parser context 10585 * 10586 * parse the XML standalone declaration 10587 * 10588 * [32] SDDecl ::= S 'standalone' Eq 10589 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 10590 * 10591 * [ VC: Standalone Document Declaration ] 10592 * TODO The standalone document declaration must have the value "no" 10593 * if any external markup declarations contain declarations of: 10594 * - attributes with default values, if elements to which these 10595 * attributes apply appear in the document without specifications 10596 * of values for these attributes, or 10597 * - entities (other than amp, lt, gt, apos, quot), if references 10598 * to those entities appear in the document, or 10599 * - attributes with values subject to normalization, where the 10600 * attribute appears in the document with a value which will change 10601 * as a result of normalization, or 10602 * - element types with element content, if white space occurs directly 10603 * within any instance of those types. 10604 * 10605 * Returns: 10606 * 1 if standalone="yes" 10607 * 0 if standalone="no" 10608 * -2 if standalone attribute is missing or invalid 10609 * (A standalone value of -2 means that the XML declaration was found, 10610 * but no value was specified for the standalone attribute). 10611 */ 10612 10613 int 10614 xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 10615 int standalone = -2; 10616 10617 SKIP_BLANKS; 10618 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 10619 SKIP(10); 10620 SKIP_BLANKS; 10621 if (RAW != '=') { 10622 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 10623 return(standalone); 10624 } 10625 NEXT; 10626 SKIP_BLANKS; 10627 if (RAW == '\''){ 10628 NEXT; 10629 if ((RAW == 'n') && (NXT(1) == 'o')) { 10630 standalone = 0; 10631 SKIP(2); 10632 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10633 (NXT(2) == 's')) { 10634 standalone = 1; 10635 SKIP(3); 10636 } else { 10637 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10638 } 10639 if (RAW != '\'') { 10640 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10641 } else 10642 NEXT; 10643 } else if (RAW == '"'){ 10644 NEXT; 10645 if ((RAW == 'n') && (NXT(1) == 'o')) { 10646 standalone = 0; 10647 SKIP(2); 10648 } else if ((RAW == 'y') && (NXT(1) == 'e') && 10649 (NXT(2) == 's')) { 10650 standalone = 1; 10651 SKIP(3); 10652 } else { 10653 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 10654 } 10655 if (RAW != '"') { 10656 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 10657 } else 10658 NEXT; 10659 } else { 10660 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 10661 } 10662 } 10663 return(standalone); 10664 } 10665 10666 /** 10667 * xmlParseXMLDecl: 10668 * @ctxt: an XML parser context 10669 * 10670 * parse an XML declaration header 10671 * 10672 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 10673 */ 10674 10675 void 10676 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 10677 xmlChar *version; 10678 10679 /* 10680 * This value for standalone indicates that the document has an 10681 * XML declaration but it does not have a standalone attribute. 10682 * It will be overwritten later if a standalone attribute is found. 10683 */ 10684 ctxt->input->standalone = -2; 10685 10686 /* 10687 * We know that '<?xml' is here. 10688 */ 10689 SKIP(5); 10690 10691 if (!IS_BLANK_CH(RAW)) { 10692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 10693 "Blank needed after '<?xml'\n"); 10694 } 10695 SKIP_BLANKS; 10696 10697 /* 10698 * We must have the VersionInfo here. 10699 */ 10700 version = xmlParseVersionInfo(ctxt); 10701 if (version == NULL) { 10702 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 10703 } else { 10704 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 10705 /* 10706 * Changed here for XML-1.0 5th edition 10707 */ 10708 if (ctxt->options & XML_PARSE_OLD10) { 10709 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10710 "Unsupported version '%s'\n", 10711 version); 10712 } else { 10713 if ((version[0] == '1') && ((version[1] == '.'))) { 10714 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 10715 "Unsupported version '%s'\n", 10716 version, NULL); 10717 } else { 10718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 10719 "Unsupported version '%s'\n", 10720 version); 10721 } 10722 } 10723 } 10724 if (ctxt->version != NULL) 10725 xmlFree((void *) ctxt->version); 10726 ctxt->version = version; 10727 } 10728 10729 /* 10730 * We may have the encoding declaration 10731 */ 10732 if (!IS_BLANK_CH(RAW)) { 10733 if ((RAW == '?') && (NXT(1) == '>')) { 10734 SKIP(2); 10735 return; 10736 } 10737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10738 } 10739 xmlParseEncodingDecl(ctxt); 10740 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10741 (ctxt->instate == XML_PARSER_EOF)) { 10742 /* 10743 * The XML REC instructs us to stop parsing right here 10744 */ 10745 return; 10746 } 10747 10748 /* 10749 * We may have the standalone status. 10750 */ 10751 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10752 if ((RAW == '?') && (NXT(1) == '>')) { 10753 SKIP(2); 10754 return; 10755 } 10756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10757 } 10758 10759 /* 10760 * We can grow the input buffer freely at that point 10761 */ 10762 GROW; 10763 10764 SKIP_BLANKS; 10765 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10766 10767 SKIP_BLANKS; 10768 if ((RAW == '?') && (NXT(1) == '>')) { 10769 SKIP(2); 10770 } else if (RAW == '>') { 10771 /* Deprecated old WD ... */ 10772 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10773 NEXT; 10774 } else { 10775 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10776 MOVETO_ENDTAG(CUR_PTR); 10777 NEXT; 10778 } 10779 } 10780 10781 /** 10782 * xmlParseMisc: 10783 * @ctxt: an XML parser context 10784 * 10785 * parse an XML Misc* optional field. 10786 * 10787 * [27] Misc ::= Comment | PI | S 10788 */ 10789 10790 void 10791 xmlParseMisc(xmlParserCtxtPtr ctxt) { 10792 while ((ctxt->instate != XML_PARSER_EOF) && 10793 (((RAW == '<') && (NXT(1) == '?')) || 10794 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10795 IS_BLANK_CH(CUR))) { 10796 if ((RAW == '<') && (NXT(1) == '?')) { 10797 xmlParsePI(ctxt); 10798 } else if (IS_BLANK_CH(CUR)) { 10799 NEXT; 10800 } else 10801 xmlParseComment(ctxt); 10802 } 10803 } 10804 10805 /** 10806 * xmlParseDocument: 10807 * @ctxt: an XML parser context 10808 * 10809 * parse an XML document (and build a tree if using the standard SAX 10810 * interface). 10811 * 10812 * [1] document ::= prolog element Misc* 10813 * 10814 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10815 * 10816 * Returns 0, -1 in case of error. the parser context is augmented 10817 * as a result of the parsing. 10818 */ 10819 10820 int 10821 xmlParseDocument(xmlParserCtxtPtr ctxt) { 10822 xmlChar start[4]; 10823 xmlCharEncoding enc; 10824 10825 xmlInitParser(); 10826 10827 if ((ctxt == NULL) || (ctxt->input == NULL)) 10828 return(-1); 10829 10830 GROW; 10831 10832 /* 10833 * SAX: detecting the level. 10834 */ 10835 xmlDetectSAX2(ctxt); 10836 10837 /* 10838 * SAX: beginning of the document processing. 10839 */ 10840 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10841 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10842 if (ctxt->instate == XML_PARSER_EOF) 10843 return(-1); 10844 10845 if ((ctxt->encoding == NULL) && 10846 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10847 /* 10848 * Get the 4 first bytes and decode the charset 10849 * if enc != XML_CHAR_ENCODING_NONE 10850 * plug some encoding conversion routines. 10851 */ 10852 start[0] = RAW; 10853 start[1] = NXT(1); 10854 start[2] = NXT(2); 10855 start[3] = NXT(3); 10856 enc = xmlDetectCharEncoding(&start[0], 4); 10857 if (enc != XML_CHAR_ENCODING_NONE) { 10858 xmlSwitchEncoding(ctxt, enc); 10859 } 10860 } 10861 10862 10863 if (CUR == 0) { 10864 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10865 return(-1); 10866 } 10867 10868 /* 10869 * Check for the XMLDecl in the Prolog. 10870 * do not GROW here to avoid the detected encoder to decode more 10871 * than just the first line, unless the amount of data is really 10872 * too small to hold "<?xml version="1.0" encoding="foo" 10873 */ 10874 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10875 GROW; 10876 } 10877 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10878 10879 /* 10880 * Note that we will switch encoding on the fly. 10881 */ 10882 xmlParseXMLDecl(ctxt); 10883 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || 10884 (ctxt->instate == XML_PARSER_EOF)) { 10885 /* 10886 * The XML REC instructs us to stop parsing right here 10887 */ 10888 return(-1); 10889 } 10890 ctxt->standalone = ctxt->input->standalone; 10891 SKIP_BLANKS; 10892 } else { 10893 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10894 } 10895 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10896 ctxt->sax->startDocument(ctxt->userData); 10897 if (ctxt->instate == XML_PARSER_EOF) 10898 return(-1); 10899 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) && 10900 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) { 10901 ctxt->myDoc->compression = ctxt->input->buf->compressed; 10902 } 10903 10904 /* 10905 * The Misc part of the Prolog 10906 */ 10907 GROW; 10908 xmlParseMisc(ctxt); 10909 10910 /* 10911 * Then possibly doc type declaration(s) and more Misc 10912 * (doctypedecl Misc*)? 10913 */ 10914 GROW; 10915 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10916 10917 ctxt->inSubset = 1; 10918 xmlParseDocTypeDecl(ctxt); 10919 if (RAW == '[') { 10920 ctxt->instate = XML_PARSER_DTD; 10921 xmlParseInternalSubset(ctxt); 10922 if (ctxt->instate == XML_PARSER_EOF) 10923 return(-1); 10924 } 10925 10926 /* 10927 * Create and update the external subset. 10928 */ 10929 ctxt->inSubset = 2; 10930 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10931 (!ctxt->disableSAX)) 10932 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10933 ctxt->extSubSystem, ctxt->extSubURI); 10934 if (ctxt->instate == XML_PARSER_EOF) 10935 return(-1); 10936 ctxt->inSubset = 0; 10937 10938 xmlCleanSpecialAttr(ctxt); 10939 10940 ctxt->instate = XML_PARSER_PROLOG; 10941 xmlParseMisc(ctxt); 10942 } 10943 10944 /* 10945 * Time to start parsing the tree itself 10946 */ 10947 GROW; 10948 if (RAW != '<') { 10949 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10950 "Start tag expected, '<' not found\n"); 10951 } else { 10952 ctxt->instate = XML_PARSER_CONTENT; 10953 xmlParseElement(ctxt); 10954 ctxt->instate = XML_PARSER_EPILOG; 10955 10956 10957 /* 10958 * The Misc part at the end 10959 */ 10960 xmlParseMisc(ctxt); 10961 10962 if (RAW != 0) { 10963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10964 } 10965 ctxt->instate = XML_PARSER_EOF; 10966 } 10967 10968 /* 10969 * SAX: end of the document processing. 10970 */ 10971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10972 ctxt->sax->endDocument(ctxt->userData); 10973 10974 /* 10975 * Remove locally kept entity definitions if the tree was not built 10976 */ 10977 if ((ctxt->myDoc != NULL) && 10978 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10979 xmlFreeDoc(ctxt->myDoc); 10980 ctxt->myDoc = NULL; 10981 } 10982 10983 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10984 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10985 if (ctxt->valid) 10986 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10987 if (ctxt->nsWellFormed) 10988 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10989 if (ctxt->options & XML_PARSE_OLD10) 10990 ctxt->myDoc->properties |= XML_DOC_OLD10; 10991 } 10992 if (! ctxt->wellFormed) { 10993 ctxt->valid = 0; 10994 return(-1); 10995 } 10996 return(0); 10997 } 10998 10999 /** 11000 * xmlParseExtParsedEnt: 11001 * @ctxt: an XML parser context 11002 * 11003 * parse a general parsed entity 11004 * An external general parsed entity is well-formed if it matches the 11005 * production labeled extParsedEnt. 11006 * 11007 * [78] extParsedEnt ::= TextDecl? content 11008 * 11009 * Returns 0, -1 in case of error. the parser context is augmented 11010 * as a result of the parsing. 11011 */ 11012 11013 int 11014 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 11015 xmlChar start[4]; 11016 xmlCharEncoding enc; 11017 11018 if ((ctxt == NULL) || (ctxt->input == NULL)) 11019 return(-1); 11020 11021 xmlDefaultSAXHandlerInit(); 11022 11023 xmlDetectSAX2(ctxt); 11024 11025 GROW; 11026 11027 /* 11028 * SAX: beginning of the document processing. 11029 */ 11030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11031 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 11032 11033 /* 11034 * Get the 4 first bytes and decode the charset 11035 * if enc != XML_CHAR_ENCODING_NONE 11036 * plug some encoding conversion routines. 11037 */ 11038 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11039 start[0] = RAW; 11040 start[1] = NXT(1); 11041 start[2] = NXT(2); 11042 start[3] = NXT(3); 11043 enc = xmlDetectCharEncoding(start, 4); 11044 if (enc != XML_CHAR_ENCODING_NONE) { 11045 xmlSwitchEncoding(ctxt, enc); 11046 } 11047 } 11048 11049 11050 if (CUR == 0) { 11051 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11052 } 11053 11054 /* 11055 * Check for the XMLDecl in the Prolog. 11056 */ 11057 GROW; 11058 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11059 11060 /* 11061 * Note that we will switch encoding on the fly. 11062 */ 11063 xmlParseXMLDecl(ctxt); 11064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11065 /* 11066 * The XML REC instructs us to stop parsing right here 11067 */ 11068 return(-1); 11069 } 11070 SKIP_BLANKS; 11071 } else { 11072 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11073 } 11074 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 11075 ctxt->sax->startDocument(ctxt->userData); 11076 if (ctxt->instate == XML_PARSER_EOF) 11077 return(-1); 11078 11079 /* 11080 * Doing validity checking on chunk doesn't make sense 11081 */ 11082 ctxt->instate = XML_PARSER_CONTENT; 11083 ctxt->validate = 0; 11084 ctxt->loadsubset = 0; 11085 ctxt->depth = 0; 11086 11087 xmlParseContent(ctxt); 11088 if (ctxt->instate == XML_PARSER_EOF) 11089 return(-1); 11090 11091 if ((RAW == '<') && (NXT(1) == '/')) { 11092 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11093 } else if (RAW != 0) { 11094 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11095 } 11096 11097 /* 11098 * SAX: end of the document processing. 11099 */ 11100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11101 ctxt->sax->endDocument(ctxt->userData); 11102 11103 if (! ctxt->wellFormed) return(-1); 11104 return(0); 11105 } 11106 11107 #ifdef LIBXML_PUSH_ENABLED 11108 /************************************************************************ 11109 * * 11110 * Progressive parsing interfaces * 11111 * * 11112 ************************************************************************/ 11113 11114 /** 11115 * xmlParseLookupSequence: 11116 * @ctxt: an XML parser context 11117 * @first: the first char to lookup 11118 * @next: the next char to lookup or zero 11119 * @third: the next char to lookup or zero 11120 * 11121 * Try to find if a sequence (first, next, third) or just (first next) or 11122 * (first) is available in the input stream. 11123 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 11124 * to avoid rescanning sequences of bytes, it DOES change the state of the 11125 * parser, do not use liberally. 11126 * 11127 * Returns the index to the current parsing point if the full sequence 11128 * is available, -1 otherwise. 11129 */ 11130 static int 11131 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 11132 xmlChar next, xmlChar third) { 11133 int base, len; 11134 xmlParserInputPtr in; 11135 const xmlChar *buf; 11136 11137 in = ctxt->input; 11138 if (in == NULL) return(-1); 11139 base = in->cur - in->base; 11140 if (base < 0) return(-1); 11141 if (ctxt->checkIndex > base) 11142 base = ctxt->checkIndex; 11143 if (in->buf == NULL) { 11144 buf = in->base; 11145 len = in->length; 11146 } else { 11147 buf = xmlBufContent(in->buf->buffer); 11148 len = xmlBufUse(in->buf->buffer); 11149 } 11150 /* take into account the sequence length */ 11151 if (third) len -= 2; 11152 else if (next) len --; 11153 for (;base < len;base++) { 11154 if (buf[base] == first) { 11155 if (third != 0) { 11156 if ((buf[base + 1] != next) || 11157 (buf[base + 2] != third)) continue; 11158 } else if (next != 0) { 11159 if (buf[base + 1] != next) continue; 11160 } 11161 ctxt->checkIndex = 0; 11162 #ifdef DEBUG_PUSH 11163 if (next == 0) 11164 xmlGenericError(xmlGenericErrorContext, 11165 "PP: lookup '%c' found at %d\n", 11166 first, base); 11167 else if (third == 0) 11168 xmlGenericError(xmlGenericErrorContext, 11169 "PP: lookup '%c%c' found at %d\n", 11170 first, next, base); 11171 else 11172 xmlGenericError(xmlGenericErrorContext, 11173 "PP: lookup '%c%c%c' found at %d\n", 11174 first, next, third, base); 11175 #endif 11176 return(base - (in->cur - in->base)); 11177 } 11178 } 11179 ctxt->checkIndex = base; 11180 #ifdef DEBUG_PUSH 11181 if (next == 0) 11182 xmlGenericError(xmlGenericErrorContext, 11183 "PP: lookup '%c' failed\n", first); 11184 else if (third == 0) 11185 xmlGenericError(xmlGenericErrorContext, 11186 "PP: lookup '%c%c' failed\n", first, next); 11187 else 11188 xmlGenericError(xmlGenericErrorContext, 11189 "PP: lookup '%c%c%c' failed\n", first, next, third); 11190 #endif 11191 return(-1); 11192 } 11193 11194 /** 11195 * xmlParseGetLasts: 11196 * @ctxt: an XML parser context 11197 * @lastlt: pointer to store the last '<' from the input 11198 * @lastgt: pointer to store the last '>' from the input 11199 * 11200 * Lookup the last < and > in the current chunk 11201 */ 11202 static void 11203 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 11204 const xmlChar **lastgt) { 11205 const xmlChar *tmp; 11206 11207 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 11208 xmlGenericError(xmlGenericErrorContext, 11209 "Internal error: xmlParseGetLasts\n"); 11210 return; 11211 } 11212 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 11213 tmp = ctxt->input->end; 11214 tmp--; 11215 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 11216 if (tmp < ctxt->input->base) { 11217 *lastlt = NULL; 11218 *lastgt = NULL; 11219 } else { 11220 *lastlt = tmp; 11221 tmp++; 11222 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 11223 if (*tmp == '\'') { 11224 tmp++; 11225 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 11226 if (tmp < ctxt->input->end) tmp++; 11227 } else if (*tmp == '"') { 11228 tmp++; 11229 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 11230 if (tmp < ctxt->input->end) tmp++; 11231 } else 11232 tmp++; 11233 } 11234 if (tmp < ctxt->input->end) 11235 *lastgt = tmp; 11236 else { 11237 tmp = *lastlt; 11238 tmp--; 11239 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 11240 if (tmp >= ctxt->input->base) 11241 *lastgt = tmp; 11242 else 11243 *lastgt = NULL; 11244 } 11245 } 11246 } else { 11247 *lastlt = NULL; 11248 *lastgt = NULL; 11249 } 11250 } 11251 /** 11252 * xmlCheckCdataPush: 11253 * @cur: pointer to the block of characters 11254 * @len: length of the block in bytes 11255 * @complete: 1 if complete CDATA block is passed in, 0 if partial block 11256 * 11257 * Check that the block of characters is okay as SCdata content [20] 11258 * 11259 * Returns the number of bytes to pass if okay, a negative index where an 11260 * UTF-8 error occured otherwise 11261 */ 11262 static int 11263 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) { 11264 int ix; 11265 unsigned char c; 11266 int codepoint; 11267 11268 if ((utf == NULL) || (len <= 0)) 11269 return(0); 11270 11271 for (ix = 0; ix < len;) { /* string is 0-terminated */ 11272 c = utf[ix]; 11273 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 11274 if (c >= 0x20) 11275 ix++; 11276 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 11277 ix++; 11278 else 11279 return(-ix); 11280 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 11281 if (ix + 2 > len) return(complete ? -ix : ix); 11282 if ((utf[ix+1] & 0xc0 ) != 0x80) 11283 return(-ix); 11284 codepoint = (utf[ix] & 0x1f) << 6; 11285 codepoint |= utf[ix+1] & 0x3f; 11286 if (!xmlIsCharQ(codepoint)) 11287 return(-ix); 11288 ix += 2; 11289 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 11290 if (ix + 3 > len) return(complete ? -ix : ix); 11291 if (((utf[ix+1] & 0xc0) != 0x80) || 11292 ((utf[ix+2] & 0xc0) != 0x80)) 11293 return(-ix); 11294 codepoint = (utf[ix] & 0xf) << 12; 11295 codepoint |= (utf[ix+1] & 0x3f) << 6; 11296 codepoint |= utf[ix+2] & 0x3f; 11297 if (!xmlIsCharQ(codepoint)) 11298 return(-ix); 11299 ix += 3; 11300 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 11301 if (ix + 4 > len) return(complete ? -ix : ix); 11302 if (((utf[ix+1] & 0xc0) != 0x80) || 11303 ((utf[ix+2] & 0xc0) != 0x80) || 11304 ((utf[ix+3] & 0xc0) != 0x80)) 11305 return(-ix); 11306 codepoint = (utf[ix] & 0x7) << 18; 11307 codepoint |= (utf[ix+1] & 0x3f) << 12; 11308 codepoint |= (utf[ix+2] & 0x3f) << 6; 11309 codepoint |= utf[ix+3] & 0x3f; 11310 if (!xmlIsCharQ(codepoint)) 11311 return(-ix); 11312 ix += 4; 11313 } else /* unknown encoding */ 11314 return(-ix); 11315 } 11316 return(ix); 11317 } 11318 11319 /** 11320 * xmlParseTryOrFinish: 11321 * @ctxt: an XML parser context 11322 * @terminate: last chunk indicator 11323 * 11324 * Try to progress on parsing 11325 * 11326 * Returns zero if no parsing was possible 11327 */ 11328 static int 11329 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 11330 int ret = 0; 11331 int avail, tlen; 11332 xmlChar cur, next; 11333 const xmlChar *lastlt, *lastgt; 11334 11335 if (ctxt->input == NULL) 11336 return(0); 11337 11338 #ifdef DEBUG_PUSH 11339 switch (ctxt->instate) { 11340 case XML_PARSER_EOF: 11341 xmlGenericError(xmlGenericErrorContext, 11342 "PP: try EOF\n"); break; 11343 case XML_PARSER_START: 11344 xmlGenericError(xmlGenericErrorContext, 11345 "PP: try START\n"); break; 11346 case XML_PARSER_MISC: 11347 xmlGenericError(xmlGenericErrorContext, 11348 "PP: try MISC\n");break; 11349 case XML_PARSER_COMMENT: 11350 xmlGenericError(xmlGenericErrorContext, 11351 "PP: try COMMENT\n");break; 11352 case XML_PARSER_PROLOG: 11353 xmlGenericError(xmlGenericErrorContext, 11354 "PP: try PROLOG\n");break; 11355 case XML_PARSER_START_TAG: 11356 xmlGenericError(xmlGenericErrorContext, 11357 "PP: try START_TAG\n");break; 11358 case XML_PARSER_CONTENT: 11359 xmlGenericError(xmlGenericErrorContext, 11360 "PP: try CONTENT\n");break; 11361 case XML_PARSER_CDATA_SECTION: 11362 xmlGenericError(xmlGenericErrorContext, 11363 "PP: try CDATA_SECTION\n");break; 11364 case XML_PARSER_END_TAG: 11365 xmlGenericError(xmlGenericErrorContext, 11366 "PP: try END_TAG\n");break; 11367 case XML_PARSER_ENTITY_DECL: 11368 xmlGenericError(xmlGenericErrorContext, 11369 "PP: try ENTITY_DECL\n");break; 11370 case XML_PARSER_ENTITY_VALUE: 11371 xmlGenericError(xmlGenericErrorContext, 11372 "PP: try ENTITY_VALUE\n");break; 11373 case XML_PARSER_ATTRIBUTE_VALUE: 11374 xmlGenericError(xmlGenericErrorContext, 11375 "PP: try ATTRIBUTE_VALUE\n");break; 11376 case XML_PARSER_DTD: 11377 xmlGenericError(xmlGenericErrorContext, 11378 "PP: try DTD\n");break; 11379 case XML_PARSER_EPILOG: 11380 xmlGenericError(xmlGenericErrorContext, 11381 "PP: try EPILOG\n");break; 11382 case XML_PARSER_PI: 11383 xmlGenericError(xmlGenericErrorContext, 11384 "PP: try PI\n");break; 11385 case XML_PARSER_IGNORE: 11386 xmlGenericError(xmlGenericErrorContext, 11387 "PP: try IGNORE\n");break; 11388 } 11389 #endif 11390 11391 if ((ctxt->input != NULL) && 11392 (ctxt->input->cur - ctxt->input->base > 4096)) { 11393 xmlSHRINK(ctxt); 11394 ctxt->checkIndex = 0; 11395 } 11396 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11397 11398 while (ctxt->instate != XML_PARSER_EOF) { 11399 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11400 return(0); 11401 11402 11403 /* 11404 * Pop-up of finished entities. 11405 */ 11406 while ((RAW == 0) && (ctxt->inputNr > 1)) 11407 xmlPopInput(ctxt); 11408 11409 if (ctxt->input == NULL) break; 11410 if (ctxt->input->buf == NULL) 11411 avail = ctxt->input->length - 11412 (ctxt->input->cur - ctxt->input->base); 11413 else { 11414 /* 11415 * If we are operating on converted input, try to flush 11416 * remainng chars to avoid them stalling in the non-converted 11417 * buffer. But do not do this in document start where 11418 * encoding="..." may not have been read and we work on a 11419 * guessed encoding. 11420 */ 11421 if ((ctxt->instate != XML_PARSER_START) && 11422 (ctxt->input->buf->raw != NULL) && 11423 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { 11424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 11425 ctxt->input); 11426 size_t current = ctxt->input->cur - ctxt->input->base; 11427 11428 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 11429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 11430 base, current); 11431 } 11432 avail = xmlBufUse(ctxt->input->buf->buffer) - 11433 (ctxt->input->cur - ctxt->input->base); 11434 } 11435 if (avail < 1) 11436 goto done; 11437 switch (ctxt->instate) { 11438 case XML_PARSER_EOF: 11439 /* 11440 * Document parsing is done ! 11441 */ 11442 goto done; 11443 case XML_PARSER_START: 11444 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 11445 xmlChar start[4]; 11446 xmlCharEncoding enc; 11447 11448 /* 11449 * Very first chars read from the document flow. 11450 */ 11451 if (avail < 4) 11452 goto done; 11453 11454 /* 11455 * Get the 4 first bytes and decode the charset 11456 * if enc != XML_CHAR_ENCODING_NONE 11457 * plug some encoding conversion routines, 11458 * else xmlSwitchEncoding will set to (default) 11459 * UTF8. 11460 */ 11461 start[0] = RAW; 11462 start[1] = NXT(1); 11463 start[2] = NXT(2); 11464 start[3] = NXT(3); 11465 enc = xmlDetectCharEncoding(start, 4); 11466 xmlSwitchEncoding(ctxt, enc); 11467 break; 11468 } 11469 11470 if (avail < 2) 11471 goto done; 11472 cur = ctxt->input->cur[0]; 11473 next = ctxt->input->cur[1]; 11474 if (cur == 0) { 11475 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11476 ctxt->sax->setDocumentLocator(ctxt->userData, 11477 &xmlDefaultSAXLocator); 11478 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11479 xmlHaltParser(ctxt); 11480 #ifdef DEBUG_PUSH 11481 xmlGenericError(xmlGenericErrorContext, 11482 "PP: entering EOF\n"); 11483 #endif 11484 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11485 ctxt->sax->endDocument(ctxt->userData); 11486 goto done; 11487 } 11488 if ((cur == '<') && (next == '?')) { 11489 /* PI or XML decl */ 11490 if (avail < 5) return(ret); 11491 if ((!terminate) && 11492 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11493 return(ret); 11494 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11495 ctxt->sax->setDocumentLocator(ctxt->userData, 11496 &xmlDefaultSAXLocator); 11497 if ((ctxt->input->cur[2] == 'x') && 11498 (ctxt->input->cur[3] == 'm') && 11499 (ctxt->input->cur[4] == 'l') && 11500 (IS_BLANK_CH(ctxt->input->cur[5]))) { 11501 ret += 5; 11502 #ifdef DEBUG_PUSH 11503 xmlGenericError(xmlGenericErrorContext, 11504 "PP: Parsing XML Decl\n"); 11505 #endif 11506 xmlParseXMLDecl(ctxt); 11507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 11508 /* 11509 * The XML REC instructs us to stop parsing right 11510 * here 11511 */ 11512 xmlHaltParser(ctxt); 11513 return(0); 11514 } 11515 ctxt->standalone = ctxt->input->standalone; 11516 if ((ctxt->encoding == NULL) && 11517 (ctxt->input->encoding != NULL)) 11518 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 11519 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11520 (!ctxt->disableSAX)) 11521 ctxt->sax->startDocument(ctxt->userData); 11522 ctxt->instate = XML_PARSER_MISC; 11523 #ifdef DEBUG_PUSH 11524 xmlGenericError(xmlGenericErrorContext, 11525 "PP: entering MISC\n"); 11526 #endif 11527 } else { 11528 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11529 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11530 (!ctxt->disableSAX)) 11531 ctxt->sax->startDocument(ctxt->userData); 11532 ctxt->instate = XML_PARSER_MISC; 11533 #ifdef DEBUG_PUSH 11534 xmlGenericError(xmlGenericErrorContext, 11535 "PP: entering MISC\n"); 11536 #endif 11537 } 11538 } else { 11539 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 11540 ctxt->sax->setDocumentLocator(ctxt->userData, 11541 &xmlDefaultSAXLocator); 11542 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 11543 if (ctxt->version == NULL) { 11544 xmlErrMemory(ctxt, NULL); 11545 break; 11546 } 11547 if ((ctxt->sax) && (ctxt->sax->startDocument) && 11548 (!ctxt->disableSAX)) 11549 ctxt->sax->startDocument(ctxt->userData); 11550 ctxt->instate = XML_PARSER_MISC; 11551 #ifdef DEBUG_PUSH 11552 xmlGenericError(xmlGenericErrorContext, 11553 "PP: entering MISC\n"); 11554 #endif 11555 } 11556 break; 11557 case XML_PARSER_START_TAG: { 11558 const xmlChar *name; 11559 const xmlChar *prefix = NULL; 11560 const xmlChar *URI = NULL; 11561 int nsNr = ctxt->nsNr; 11562 11563 if ((avail < 2) && (ctxt->inputNr == 1)) 11564 goto done; 11565 cur = ctxt->input->cur[0]; 11566 if (cur != '<') { 11567 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 11568 xmlHaltParser(ctxt); 11569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11570 ctxt->sax->endDocument(ctxt->userData); 11571 goto done; 11572 } 11573 if (!terminate) { 11574 if (ctxt->progressive) { 11575 /* > can be found unescaped in attribute values */ 11576 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11577 goto done; 11578 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11579 goto done; 11580 } 11581 } 11582 if (ctxt->spaceNr == 0) 11583 spacePush(ctxt, -1); 11584 else if (*ctxt->space == -2) 11585 spacePush(ctxt, -1); 11586 else 11587 spacePush(ctxt, *ctxt->space); 11588 #ifdef LIBXML_SAX1_ENABLED 11589 if (ctxt->sax2) 11590 #endif /* LIBXML_SAX1_ENABLED */ 11591 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 11592 #ifdef LIBXML_SAX1_ENABLED 11593 else 11594 name = xmlParseStartTag(ctxt); 11595 #endif /* LIBXML_SAX1_ENABLED */ 11596 if (ctxt->instate == XML_PARSER_EOF) 11597 goto done; 11598 if (name == NULL) { 11599 spacePop(ctxt); 11600 xmlHaltParser(ctxt); 11601 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11602 ctxt->sax->endDocument(ctxt->userData); 11603 goto done; 11604 } 11605 #ifdef LIBXML_VALID_ENABLED 11606 /* 11607 * [ VC: Root Element Type ] 11608 * The Name in the document type declaration must match 11609 * the element type of the root element. 11610 */ 11611 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 11612 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 11613 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 11614 #endif /* LIBXML_VALID_ENABLED */ 11615 11616 /* 11617 * Check for an Empty Element. 11618 */ 11619 if ((RAW == '/') && (NXT(1) == '>')) { 11620 SKIP(2); 11621 11622 if (ctxt->sax2) { 11623 if ((ctxt->sax != NULL) && 11624 (ctxt->sax->endElementNs != NULL) && 11625 (!ctxt->disableSAX)) 11626 ctxt->sax->endElementNs(ctxt->userData, name, 11627 prefix, URI); 11628 if (ctxt->nsNr - nsNr > 0) 11629 nsPop(ctxt, ctxt->nsNr - nsNr); 11630 #ifdef LIBXML_SAX1_ENABLED 11631 } else { 11632 if ((ctxt->sax != NULL) && 11633 (ctxt->sax->endElement != NULL) && 11634 (!ctxt->disableSAX)) 11635 ctxt->sax->endElement(ctxt->userData, name); 11636 #endif /* LIBXML_SAX1_ENABLED */ 11637 } 11638 if (ctxt->instate == XML_PARSER_EOF) 11639 goto done; 11640 spacePop(ctxt); 11641 if (ctxt->nameNr == 0) { 11642 ctxt->instate = XML_PARSER_EPILOG; 11643 } else { 11644 ctxt->instate = XML_PARSER_CONTENT; 11645 } 11646 ctxt->progressive = 1; 11647 break; 11648 } 11649 if (RAW == '>') { 11650 NEXT; 11651 } else { 11652 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 11653 "Couldn't find end of Start Tag %s\n", 11654 name); 11655 nodePop(ctxt); 11656 spacePop(ctxt); 11657 } 11658 if (ctxt->sax2) 11659 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 11660 #ifdef LIBXML_SAX1_ENABLED 11661 else 11662 namePush(ctxt, name); 11663 #endif /* LIBXML_SAX1_ENABLED */ 11664 11665 ctxt->instate = XML_PARSER_CONTENT; 11666 ctxt->progressive = 1; 11667 break; 11668 } 11669 case XML_PARSER_CONTENT: { 11670 const xmlChar *test; 11671 unsigned int cons; 11672 if ((avail < 2) && (ctxt->inputNr == 1)) 11673 goto done; 11674 cur = ctxt->input->cur[0]; 11675 next = ctxt->input->cur[1]; 11676 11677 test = CUR_PTR; 11678 cons = ctxt->input->consumed; 11679 if ((cur == '<') && (next == '/')) { 11680 ctxt->instate = XML_PARSER_END_TAG; 11681 break; 11682 } else if ((cur == '<') && (next == '?')) { 11683 if ((!terminate) && 11684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11685 ctxt->progressive = XML_PARSER_PI; 11686 goto done; 11687 } 11688 xmlParsePI(ctxt); 11689 ctxt->instate = XML_PARSER_CONTENT; 11690 ctxt->progressive = 1; 11691 } else if ((cur == '<') && (next != '!')) { 11692 ctxt->instate = XML_PARSER_START_TAG; 11693 break; 11694 } else if ((cur == '<') && (next == '!') && 11695 (ctxt->input->cur[2] == '-') && 11696 (ctxt->input->cur[3] == '-')) { 11697 int term; 11698 11699 if (avail < 4) 11700 goto done; 11701 ctxt->input->cur += 4; 11702 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 11703 ctxt->input->cur -= 4; 11704 if ((!terminate) && (term < 0)) { 11705 ctxt->progressive = XML_PARSER_COMMENT; 11706 goto done; 11707 } 11708 xmlParseComment(ctxt); 11709 ctxt->instate = XML_PARSER_CONTENT; 11710 ctxt->progressive = 1; 11711 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 11712 (ctxt->input->cur[2] == '[') && 11713 (ctxt->input->cur[3] == 'C') && 11714 (ctxt->input->cur[4] == 'D') && 11715 (ctxt->input->cur[5] == 'A') && 11716 (ctxt->input->cur[6] == 'T') && 11717 (ctxt->input->cur[7] == 'A') && 11718 (ctxt->input->cur[8] == '[')) { 11719 SKIP(9); 11720 ctxt->instate = XML_PARSER_CDATA_SECTION; 11721 break; 11722 } else if ((cur == '<') && (next == '!') && 11723 (avail < 9)) { 11724 goto done; 11725 } else if (cur == '&') { 11726 if ((!terminate) && 11727 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 11728 goto done; 11729 xmlParseReference(ctxt); 11730 } else { 11731 /* TODO Avoid the extra copy, handle directly !!! */ 11732 /* 11733 * Goal of the following test is: 11734 * - minimize calls to the SAX 'character' callback 11735 * when they are mergeable 11736 * - handle an problem for isBlank when we only parse 11737 * a sequence of blank chars and the next one is 11738 * not available to check against '<' presence. 11739 * - tries to homogenize the differences in SAX 11740 * callbacks between the push and pull versions 11741 * of the parser. 11742 */ 11743 if ((ctxt->inputNr == 1) && 11744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 11745 if (!terminate) { 11746 if (ctxt->progressive) { 11747 if ((lastlt == NULL) || 11748 (ctxt->input->cur > lastlt)) 11749 goto done; 11750 } else if (xmlParseLookupSequence(ctxt, 11751 '<', 0, 0) < 0) { 11752 goto done; 11753 } 11754 } 11755 } 11756 ctxt->checkIndex = 0; 11757 xmlParseCharData(ctxt, 0); 11758 } 11759 /* 11760 * Pop-up of finished entities. 11761 */ 11762 while ((RAW == 0) && (ctxt->inputNr > 1)) 11763 xmlPopInput(ctxt); 11764 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 11765 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 11766 "detected an error in element content\n"); 11767 xmlHaltParser(ctxt); 11768 break; 11769 } 11770 break; 11771 } 11772 case XML_PARSER_END_TAG: 11773 if (avail < 2) 11774 goto done; 11775 if (!terminate) { 11776 if (ctxt->progressive) { 11777 /* > can be found unescaped in attribute values */ 11778 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11779 goto done; 11780 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11781 goto done; 11782 } 11783 } 11784 if (ctxt->sax2) { 11785 xmlParseEndTag2(ctxt, 11786 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11787 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11788 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11789 nameNsPop(ctxt); 11790 } 11791 #ifdef LIBXML_SAX1_ENABLED 11792 else 11793 xmlParseEndTag1(ctxt, 0); 11794 #endif /* LIBXML_SAX1_ENABLED */ 11795 if (ctxt->instate == XML_PARSER_EOF) { 11796 /* Nothing */ 11797 } else if (ctxt->nameNr == 0) { 11798 ctxt->instate = XML_PARSER_EPILOG; 11799 } else { 11800 ctxt->instate = XML_PARSER_CONTENT; 11801 } 11802 break; 11803 case XML_PARSER_CDATA_SECTION: { 11804 /* 11805 * The Push mode need to have the SAX callback for 11806 * cdataBlock merge back contiguous callbacks. 11807 */ 11808 int base; 11809 11810 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11811 if (base < 0) { 11812 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11813 int tmp; 11814 11815 tmp = xmlCheckCdataPush(ctxt->input->cur, 11816 XML_PARSER_BIG_BUFFER_SIZE, 0); 11817 if (tmp < 0) { 11818 tmp = -tmp; 11819 ctxt->input->cur += tmp; 11820 goto encoding_error; 11821 } 11822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11823 if (ctxt->sax->cdataBlock != NULL) 11824 ctxt->sax->cdataBlock(ctxt->userData, 11825 ctxt->input->cur, tmp); 11826 else if (ctxt->sax->characters != NULL) 11827 ctxt->sax->characters(ctxt->userData, 11828 ctxt->input->cur, tmp); 11829 } 11830 if (ctxt->instate == XML_PARSER_EOF) 11831 goto done; 11832 SKIPL(tmp); 11833 ctxt->checkIndex = 0; 11834 } 11835 goto done; 11836 } else { 11837 int tmp; 11838 11839 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1); 11840 if ((tmp < 0) || (tmp != base)) { 11841 tmp = -tmp; 11842 ctxt->input->cur += tmp; 11843 goto encoding_error; 11844 } 11845 if ((ctxt->sax != NULL) && (base == 0) && 11846 (ctxt->sax->cdataBlock != NULL) && 11847 (!ctxt->disableSAX)) { 11848 /* 11849 * Special case to provide identical behaviour 11850 * between pull and push parsers on enpty CDATA 11851 * sections 11852 */ 11853 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11854 (!strncmp((const char *)&ctxt->input->cur[-9], 11855 "<![CDATA[", 9))) 11856 ctxt->sax->cdataBlock(ctxt->userData, 11857 BAD_CAST "", 0); 11858 } else if ((ctxt->sax != NULL) && (base > 0) && 11859 (!ctxt->disableSAX)) { 11860 if (ctxt->sax->cdataBlock != NULL) 11861 ctxt->sax->cdataBlock(ctxt->userData, 11862 ctxt->input->cur, base); 11863 else if (ctxt->sax->characters != NULL) 11864 ctxt->sax->characters(ctxt->userData, 11865 ctxt->input->cur, base); 11866 } 11867 if (ctxt->instate == XML_PARSER_EOF) 11868 goto done; 11869 SKIPL(base + 3); 11870 ctxt->checkIndex = 0; 11871 ctxt->instate = XML_PARSER_CONTENT; 11872 #ifdef DEBUG_PUSH 11873 xmlGenericError(xmlGenericErrorContext, 11874 "PP: entering CONTENT\n"); 11875 #endif 11876 } 11877 break; 11878 } 11879 case XML_PARSER_MISC: 11880 SKIP_BLANKS; 11881 if (ctxt->input->buf == NULL) 11882 avail = ctxt->input->length - 11883 (ctxt->input->cur - ctxt->input->base); 11884 else 11885 avail = xmlBufUse(ctxt->input->buf->buffer) - 11886 (ctxt->input->cur - ctxt->input->base); 11887 if (avail < 2) 11888 goto done; 11889 cur = ctxt->input->cur[0]; 11890 next = ctxt->input->cur[1]; 11891 if ((cur == '<') && (next == '?')) { 11892 if ((!terminate) && 11893 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11894 ctxt->progressive = XML_PARSER_PI; 11895 goto done; 11896 } 11897 #ifdef DEBUG_PUSH 11898 xmlGenericError(xmlGenericErrorContext, 11899 "PP: Parsing PI\n"); 11900 #endif 11901 xmlParsePI(ctxt); 11902 if (ctxt->instate == XML_PARSER_EOF) 11903 goto done; 11904 ctxt->instate = XML_PARSER_MISC; 11905 ctxt->progressive = 1; 11906 ctxt->checkIndex = 0; 11907 } else if ((cur == '<') && (next == '!') && 11908 (ctxt->input->cur[2] == '-') && 11909 (ctxt->input->cur[3] == '-')) { 11910 if ((!terminate) && 11911 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 11912 ctxt->progressive = XML_PARSER_COMMENT; 11913 goto done; 11914 } 11915 #ifdef DEBUG_PUSH 11916 xmlGenericError(xmlGenericErrorContext, 11917 "PP: Parsing Comment\n"); 11918 #endif 11919 xmlParseComment(ctxt); 11920 if (ctxt->instate == XML_PARSER_EOF) 11921 goto done; 11922 ctxt->instate = XML_PARSER_MISC; 11923 ctxt->progressive = 1; 11924 ctxt->checkIndex = 0; 11925 } else if ((cur == '<') && (next == '!') && 11926 (ctxt->input->cur[2] == 'D') && 11927 (ctxt->input->cur[3] == 'O') && 11928 (ctxt->input->cur[4] == 'C') && 11929 (ctxt->input->cur[5] == 'T') && 11930 (ctxt->input->cur[6] == 'Y') && 11931 (ctxt->input->cur[7] == 'P') && 11932 (ctxt->input->cur[8] == 'E')) { 11933 if ((!terminate) && 11934 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) { 11935 ctxt->progressive = XML_PARSER_DTD; 11936 goto done; 11937 } 11938 #ifdef DEBUG_PUSH 11939 xmlGenericError(xmlGenericErrorContext, 11940 "PP: Parsing internal subset\n"); 11941 #endif 11942 ctxt->inSubset = 1; 11943 ctxt->progressive = 0; 11944 ctxt->checkIndex = 0; 11945 xmlParseDocTypeDecl(ctxt); 11946 if (ctxt->instate == XML_PARSER_EOF) 11947 goto done; 11948 if (RAW == '[') { 11949 ctxt->instate = XML_PARSER_DTD; 11950 #ifdef DEBUG_PUSH 11951 xmlGenericError(xmlGenericErrorContext, 11952 "PP: entering DTD\n"); 11953 #endif 11954 } else { 11955 /* 11956 * Create and update the external subset. 11957 */ 11958 ctxt->inSubset = 2; 11959 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11960 (ctxt->sax->externalSubset != NULL)) 11961 ctxt->sax->externalSubset(ctxt->userData, 11962 ctxt->intSubName, ctxt->extSubSystem, 11963 ctxt->extSubURI); 11964 ctxt->inSubset = 0; 11965 xmlCleanSpecialAttr(ctxt); 11966 ctxt->instate = XML_PARSER_PROLOG; 11967 #ifdef DEBUG_PUSH 11968 xmlGenericError(xmlGenericErrorContext, 11969 "PP: entering PROLOG\n"); 11970 #endif 11971 } 11972 } else if ((cur == '<') && (next == '!') && 11973 (avail < 9)) { 11974 goto done; 11975 } else { 11976 ctxt->instate = XML_PARSER_START_TAG; 11977 ctxt->progressive = XML_PARSER_START_TAG; 11978 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11979 #ifdef DEBUG_PUSH 11980 xmlGenericError(xmlGenericErrorContext, 11981 "PP: entering START_TAG\n"); 11982 #endif 11983 } 11984 break; 11985 case XML_PARSER_PROLOG: 11986 SKIP_BLANKS; 11987 if (ctxt->input->buf == NULL) 11988 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11989 else 11990 avail = xmlBufUse(ctxt->input->buf->buffer) - 11991 (ctxt->input->cur - ctxt->input->base); 11992 if (avail < 2) 11993 goto done; 11994 cur = ctxt->input->cur[0]; 11995 next = ctxt->input->cur[1]; 11996 if ((cur == '<') && (next == '?')) { 11997 if ((!terminate) && 11998 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 11999 ctxt->progressive = XML_PARSER_PI; 12000 goto done; 12001 } 12002 #ifdef DEBUG_PUSH 12003 xmlGenericError(xmlGenericErrorContext, 12004 "PP: Parsing PI\n"); 12005 #endif 12006 xmlParsePI(ctxt); 12007 if (ctxt->instate == XML_PARSER_EOF) 12008 goto done; 12009 ctxt->instate = XML_PARSER_PROLOG; 12010 ctxt->progressive = 1; 12011 } else if ((cur == '<') && (next == '!') && 12012 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12013 if ((!terminate) && 12014 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12015 ctxt->progressive = XML_PARSER_COMMENT; 12016 goto done; 12017 } 12018 #ifdef DEBUG_PUSH 12019 xmlGenericError(xmlGenericErrorContext, 12020 "PP: Parsing Comment\n"); 12021 #endif 12022 xmlParseComment(ctxt); 12023 if (ctxt->instate == XML_PARSER_EOF) 12024 goto done; 12025 ctxt->instate = XML_PARSER_PROLOG; 12026 ctxt->progressive = 1; 12027 } else if ((cur == '<') && (next == '!') && 12028 (avail < 4)) { 12029 goto done; 12030 } else { 12031 ctxt->instate = XML_PARSER_START_TAG; 12032 if (ctxt->progressive == 0) 12033 ctxt->progressive = XML_PARSER_START_TAG; 12034 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 12035 #ifdef DEBUG_PUSH 12036 xmlGenericError(xmlGenericErrorContext, 12037 "PP: entering START_TAG\n"); 12038 #endif 12039 } 12040 break; 12041 case XML_PARSER_EPILOG: 12042 SKIP_BLANKS; 12043 if (ctxt->input->buf == NULL) 12044 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 12045 else 12046 avail = xmlBufUse(ctxt->input->buf->buffer) - 12047 (ctxt->input->cur - ctxt->input->base); 12048 if (avail < 2) 12049 goto done; 12050 cur = ctxt->input->cur[0]; 12051 next = ctxt->input->cur[1]; 12052 if ((cur == '<') && (next == '?')) { 12053 if ((!terminate) && 12054 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) { 12055 ctxt->progressive = XML_PARSER_PI; 12056 goto done; 12057 } 12058 #ifdef DEBUG_PUSH 12059 xmlGenericError(xmlGenericErrorContext, 12060 "PP: Parsing PI\n"); 12061 #endif 12062 xmlParsePI(ctxt); 12063 if (ctxt->instate == XML_PARSER_EOF) 12064 goto done; 12065 ctxt->instate = XML_PARSER_EPILOG; 12066 ctxt->progressive = 1; 12067 } else if ((cur == '<') && (next == '!') && 12068 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 12069 if ((!terminate) && 12070 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) { 12071 ctxt->progressive = XML_PARSER_COMMENT; 12072 goto done; 12073 } 12074 #ifdef DEBUG_PUSH 12075 xmlGenericError(xmlGenericErrorContext, 12076 "PP: Parsing Comment\n"); 12077 #endif 12078 xmlParseComment(ctxt); 12079 if (ctxt->instate == XML_PARSER_EOF) 12080 goto done; 12081 ctxt->instate = XML_PARSER_EPILOG; 12082 ctxt->progressive = 1; 12083 } else if ((cur == '<') && (next == '!') && 12084 (avail < 4)) { 12085 goto done; 12086 } else { 12087 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12088 xmlHaltParser(ctxt); 12089 #ifdef DEBUG_PUSH 12090 xmlGenericError(xmlGenericErrorContext, 12091 "PP: entering EOF\n"); 12092 #endif 12093 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12094 ctxt->sax->endDocument(ctxt->userData); 12095 goto done; 12096 } 12097 break; 12098 case XML_PARSER_DTD: { 12099 /* 12100 * Sorry but progressive parsing of the internal subset 12101 * is not expected to be supported. We first check that 12102 * the full content of the internal subset is available and 12103 * the parsing is launched only at that point. 12104 * Internal subset ends up with "']' S? '>'" in an unescaped 12105 * section and not in a ']]>' sequence which are conditional 12106 * sections (whoever argued to keep that crap in XML deserve 12107 * a place in hell !). 12108 */ 12109 int base, i; 12110 xmlChar *buf; 12111 xmlChar quote = 0; 12112 size_t use; 12113 12114 base = ctxt->input->cur - ctxt->input->base; 12115 if (base < 0) return(0); 12116 if (ctxt->checkIndex > base) 12117 base = ctxt->checkIndex; 12118 buf = xmlBufContent(ctxt->input->buf->buffer); 12119 use = xmlBufUse(ctxt->input->buf->buffer); 12120 for (;(unsigned int) base < use; base++) { 12121 if (quote != 0) { 12122 if (buf[base] == quote) 12123 quote = 0; 12124 continue; 12125 } 12126 if ((quote == 0) && (buf[base] == '<')) { 12127 int found = 0; 12128 /* special handling of comments */ 12129 if (((unsigned int) base + 4 < use) && 12130 (buf[base + 1] == '!') && 12131 (buf[base + 2] == '-') && 12132 (buf[base + 3] == '-')) { 12133 for (;(unsigned int) base + 3 < use; base++) { 12134 if ((buf[base] == '-') && 12135 (buf[base + 1] == '-') && 12136 (buf[base + 2] == '>')) { 12137 found = 1; 12138 base += 2; 12139 break; 12140 } 12141 } 12142 if (!found) { 12143 #if 0 12144 fprintf(stderr, "unfinished comment\n"); 12145 #endif 12146 break; /* for */ 12147 } 12148 continue; 12149 } 12150 } 12151 if (buf[base] == '"') { 12152 quote = '"'; 12153 continue; 12154 } 12155 if (buf[base] == '\'') { 12156 quote = '\''; 12157 continue; 12158 } 12159 if (buf[base] == ']') { 12160 #if 0 12161 fprintf(stderr, "%c%c%c%c: ", buf[base], 12162 buf[base + 1], buf[base + 2], buf[base + 3]); 12163 #endif 12164 if ((unsigned int) base +1 >= use) 12165 break; 12166 if (buf[base + 1] == ']') { 12167 /* conditional crap, skip both ']' ! */ 12168 base++; 12169 continue; 12170 } 12171 for (i = 1; (unsigned int) base + i < use; i++) { 12172 if (buf[base + i] == '>') { 12173 #if 0 12174 fprintf(stderr, "found\n"); 12175 #endif 12176 goto found_end_int_subset; 12177 } 12178 if (!IS_BLANK_CH(buf[base + i])) { 12179 #if 0 12180 fprintf(stderr, "not found\n"); 12181 #endif 12182 goto not_end_of_int_subset; 12183 } 12184 } 12185 #if 0 12186 fprintf(stderr, "end of stream\n"); 12187 #endif 12188 break; 12189 12190 } 12191 not_end_of_int_subset: 12192 continue; /* for */ 12193 } 12194 /* 12195 * We didn't found the end of the Internal subset 12196 */ 12197 if (quote == 0) 12198 ctxt->checkIndex = base; 12199 else 12200 ctxt->checkIndex = 0; 12201 #ifdef DEBUG_PUSH 12202 if (next == 0) 12203 xmlGenericError(xmlGenericErrorContext, 12204 "PP: lookup of int subset end filed\n"); 12205 #endif 12206 goto done; 12207 12208 found_end_int_subset: 12209 ctxt->checkIndex = 0; 12210 xmlParseInternalSubset(ctxt); 12211 if (ctxt->instate == XML_PARSER_EOF) 12212 goto done; 12213 ctxt->inSubset = 2; 12214 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 12215 (ctxt->sax->externalSubset != NULL)) 12216 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 12217 ctxt->extSubSystem, ctxt->extSubURI); 12218 ctxt->inSubset = 0; 12219 xmlCleanSpecialAttr(ctxt); 12220 if (ctxt->instate == XML_PARSER_EOF) 12221 goto done; 12222 ctxt->instate = XML_PARSER_PROLOG; 12223 ctxt->checkIndex = 0; 12224 #ifdef DEBUG_PUSH 12225 xmlGenericError(xmlGenericErrorContext, 12226 "PP: entering PROLOG\n"); 12227 #endif 12228 break; 12229 } 12230 case XML_PARSER_COMMENT: 12231 xmlGenericError(xmlGenericErrorContext, 12232 "PP: internal error, state == COMMENT\n"); 12233 ctxt->instate = XML_PARSER_CONTENT; 12234 #ifdef DEBUG_PUSH 12235 xmlGenericError(xmlGenericErrorContext, 12236 "PP: entering CONTENT\n"); 12237 #endif 12238 break; 12239 case XML_PARSER_IGNORE: 12240 xmlGenericError(xmlGenericErrorContext, 12241 "PP: internal error, state == IGNORE"); 12242 ctxt->instate = XML_PARSER_DTD; 12243 #ifdef DEBUG_PUSH 12244 xmlGenericError(xmlGenericErrorContext, 12245 "PP: entering DTD\n"); 12246 #endif 12247 break; 12248 case XML_PARSER_PI: 12249 xmlGenericError(xmlGenericErrorContext, 12250 "PP: internal error, state == PI\n"); 12251 ctxt->instate = XML_PARSER_CONTENT; 12252 #ifdef DEBUG_PUSH 12253 xmlGenericError(xmlGenericErrorContext, 12254 "PP: entering CONTENT\n"); 12255 #endif 12256 break; 12257 case XML_PARSER_ENTITY_DECL: 12258 xmlGenericError(xmlGenericErrorContext, 12259 "PP: internal error, state == ENTITY_DECL\n"); 12260 ctxt->instate = XML_PARSER_DTD; 12261 #ifdef DEBUG_PUSH 12262 xmlGenericError(xmlGenericErrorContext, 12263 "PP: entering DTD\n"); 12264 #endif 12265 break; 12266 case XML_PARSER_ENTITY_VALUE: 12267 xmlGenericError(xmlGenericErrorContext, 12268 "PP: internal error, state == ENTITY_VALUE\n"); 12269 ctxt->instate = XML_PARSER_CONTENT; 12270 #ifdef DEBUG_PUSH 12271 xmlGenericError(xmlGenericErrorContext, 12272 "PP: entering DTD\n"); 12273 #endif 12274 break; 12275 case XML_PARSER_ATTRIBUTE_VALUE: 12276 xmlGenericError(xmlGenericErrorContext, 12277 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 12278 ctxt->instate = XML_PARSER_START_TAG; 12279 #ifdef DEBUG_PUSH 12280 xmlGenericError(xmlGenericErrorContext, 12281 "PP: entering START_TAG\n"); 12282 #endif 12283 break; 12284 case XML_PARSER_SYSTEM_LITERAL: 12285 xmlGenericError(xmlGenericErrorContext, 12286 "PP: internal error, state == SYSTEM_LITERAL\n"); 12287 ctxt->instate = XML_PARSER_START_TAG; 12288 #ifdef DEBUG_PUSH 12289 xmlGenericError(xmlGenericErrorContext, 12290 "PP: entering START_TAG\n"); 12291 #endif 12292 break; 12293 case XML_PARSER_PUBLIC_LITERAL: 12294 xmlGenericError(xmlGenericErrorContext, 12295 "PP: internal error, state == PUBLIC_LITERAL\n"); 12296 ctxt->instate = XML_PARSER_START_TAG; 12297 #ifdef DEBUG_PUSH 12298 xmlGenericError(xmlGenericErrorContext, 12299 "PP: entering START_TAG\n"); 12300 #endif 12301 break; 12302 } 12303 } 12304 done: 12305 #ifdef DEBUG_PUSH 12306 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 12307 #endif 12308 return(ret); 12309 encoding_error: 12310 { 12311 char buffer[150]; 12312 12313 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 12314 ctxt->input->cur[0], ctxt->input->cur[1], 12315 ctxt->input->cur[2], ctxt->input->cur[3]); 12316 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 12317 "Input is not proper UTF-8, indicate encoding !\n%s", 12318 BAD_CAST buffer, NULL); 12319 } 12320 return(0); 12321 } 12322 12323 /** 12324 * xmlParseCheckTransition: 12325 * @ctxt: an XML parser context 12326 * @chunk: a char array 12327 * @size: the size in byte of the chunk 12328 * 12329 * Check depending on the current parser state if the chunk given must be 12330 * processed immediately or one need more data to advance on parsing. 12331 * 12332 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed 12333 */ 12334 static int 12335 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) { 12336 if ((ctxt == NULL) || (chunk == NULL) || (size < 0)) 12337 return(-1); 12338 if (ctxt->instate == XML_PARSER_START_TAG) { 12339 if (memchr(chunk, '>', size) != NULL) 12340 return(1); 12341 return(0); 12342 } 12343 if (ctxt->progressive == XML_PARSER_COMMENT) { 12344 if (memchr(chunk, '>', size) != NULL) 12345 return(1); 12346 return(0); 12347 } 12348 if (ctxt->instate == XML_PARSER_CDATA_SECTION) { 12349 if (memchr(chunk, '>', size) != NULL) 12350 return(1); 12351 return(0); 12352 } 12353 if (ctxt->progressive == XML_PARSER_PI) { 12354 if (memchr(chunk, '>', size) != NULL) 12355 return(1); 12356 return(0); 12357 } 12358 if (ctxt->instate == XML_PARSER_END_TAG) { 12359 if (memchr(chunk, '>', size) != NULL) 12360 return(1); 12361 return(0); 12362 } 12363 if ((ctxt->progressive == XML_PARSER_DTD) || 12364 (ctxt->instate == XML_PARSER_DTD)) { 12365 if (memchr(chunk, '>', size) != NULL) 12366 return(1); 12367 return(0); 12368 } 12369 return(1); 12370 } 12371 12372 /** 12373 * xmlParseChunk: 12374 * @ctxt: an XML parser context 12375 * @chunk: an char array 12376 * @size: the size in byte of the chunk 12377 * @terminate: last chunk indicator 12378 * 12379 * Parse a Chunk of memory 12380 * 12381 * Returns zero if no error, the xmlParserErrors otherwise. 12382 */ 12383 int 12384 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 12385 int terminate) { 12386 int end_in_lf = 0; 12387 int remain = 0; 12388 size_t old_avail = 0; 12389 size_t avail = 0; 12390 12391 if (ctxt == NULL) 12392 return(XML_ERR_INTERNAL_ERROR); 12393 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12394 return(ctxt->errNo); 12395 if (ctxt->instate == XML_PARSER_EOF) 12396 return(-1); 12397 if (ctxt->instate == XML_PARSER_START) 12398 xmlDetectSAX2(ctxt); 12399 if ((size > 0) && (chunk != NULL) && (!terminate) && 12400 (chunk[size - 1] == '\r')) { 12401 end_in_lf = 1; 12402 size--; 12403 } 12404 12405 xmldecl_done: 12406 12407 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 12408 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 12409 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12410 size_t cur = ctxt->input->cur - ctxt->input->base; 12411 int res; 12412 12413 old_avail = xmlBufUse(ctxt->input->buf->buffer); 12414 /* 12415 * Specific handling if we autodetected an encoding, we should not 12416 * push more than the first line ... which depend on the encoding 12417 * And only push the rest once the final encoding was detected 12418 */ 12419 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 12420 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 12421 unsigned int len = 45; 12422 12423 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12424 BAD_CAST "UTF-16")) || 12425 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12426 BAD_CAST "UTF16"))) 12427 len = 90; 12428 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12429 BAD_CAST "UCS-4")) || 12430 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 12431 BAD_CAST "UCS4"))) 12432 len = 180; 12433 12434 if (ctxt->input->buf->rawconsumed < len) 12435 len -= ctxt->input->buf->rawconsumed; 12436 12437 /* 12438 * Change size for reading the initial declaration only 12439 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 12440 * will blindly copy extra bytes from memory. 12441 */ 12442 if ((unsigned int) size > len) { 12443 remain = size - len; 12444 size = len; 12445 } else { 12446 remain = 0; 12447 } 12448 } 12449 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12450 if (res < 0) { 12451 ctxt->errNo = XML_PARSER_EOF; 12452 xmlHaltParser(ctxt); 12453 return (XML_PARSER_EOF); 12454 } 12455 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12456 #ifdef DEBUG_PUSH 12457 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12458 #endif 12459 12460 } else if (ctxt->instate != XML_PARSER_EOF) { 12461 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 12462 xmlParserInputBufferPtr in = ctxt->input->buf; 12463 if ((in->encoder != NULL) && (in->buffer != NULL) && 12464 (in->raw != NULL)) { 12465 int nbchars; 12466 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); 12467 size_t current = ctxt->input->cur - ctxt->input->base; 12468 12469 nbchars = xmlCharEncInput(in, terminate); 12470 if (nbchars < 0) { 12471 /* TODO 2.6.0 */ 12472 xmlGenericError(xmlGenericErrorContext, 12473 "xmlParseChunk: encoder error\n"); 12474 return(XML_ERR_INVALID_ENCODING); 12475 } 12476 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); 12477 } 12478 } 12479 } 12480 if (remain != 0) { 12481 xmlParseTryOrFinish(ctxt, 0); 12482 } else { 12483 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) 12484 avail = xmlBufUse(ctxt->input->buf->buffer); 12485 /* 12486 * Depending on the current state it may not be such 12487 * a good idea to try parsing if there is nothing in the chunk 12488 * which would be worth doing a parser state transition and we 12489 * need to wait for more data 12490 */ 12491 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) || 12492 (old_avail == 0) || (avail == 0) || 12493 (xmlParseCheckTransition(ctxt, 12494 (const char *)&ctxt->input->base[old_avail], 12495 avail - old_avail))) 12496 xmlParseTryOrFinish(ctxt, terminate); 12497 } 12498 if (ctxt->instate == XML_PARSER_EOF) 12499 return(ctxt->errNo); 12500 12501 if ((ctxt->input != NULL) && 12502 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) || 12503 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) && 12504 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 12505 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); 12506 xmlHaltParser(ctxt); 12507 } 12508 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 12509 return(ctxt->errNo); 12510 12511 if (remain != 0) { 12512 chunk += size; 12513 size = remain; 12514 remain = 0; 12515 goto xmldecl_done; 12516 } 12517 if ((end_in_lf == 1) && (ctxt->input != NULL) && 12518 (ctxt->input->buf != NULL)) { 12519 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, 12520 ctxt->input); 12521 size_t current = ctxt->input->cur - ctxt->input->base; 12522 12523 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 12524 12525 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, 12526 base, current); 12527 } 12528 if (terminate) { 12529 /* 12530 * Check for termination 12531 */ 12532 int cur_avail = 0; 12533 12534 if (ctxt->input != NULL) { 12535 if (ctxt->input->buf == NULL) 12536 cur_avail = ctxt->input->length - 12537 (ctxt->input->cur - ctxt->input->base); 12538 else 12539 cur_avail = xmlBufUse(ctxt->input->buf->buffer) - 12540 (ctxt->input->cur - ctxt->input->base); 12541 } 12542 12543 if ((ctxt->instate != XML_PARSER_EOF) && 12544 (ctxt->instate != XML_PARSER_EPILOG)) { 12545 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12546 } 12547 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) { 12548 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 12549 } 12550 if (ctxt->instate != XML_PARSER_EOF) { 12551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 12552 ctxt->sax->endDocument(ctxt->userData); 12553 } 12554 ctxt->instate = XML_PARSER_EOF; 12555 } 12556 if (ctxt->wellFormed == 0) 12557 return((xmlParserErrors) ctxt->errNo); 12558 else 12559 return(0); 12560 } 12561 12562 /************************************************************************ 12563 * * 12564 * I/O front end functions to the parser * 12565 * * 12566 ************************************************************************/ 12567 12568 /** 12569 * xmlCreatePushParserCtxt: 12570 * @sax: a SAX handler 12571 * @user_data: The user data returned on SAX callbacks 12572 * @chunk: a pointer to an array of chars 12573 * @size: number of chars in the array 12574 * @filename: an optional file name or URI 12575 * 12576 * Create a parser context for using the XML parser in push mode. 12577 * If @buffer and @size are non-NULL, the data is used to detect 12578 * the encoding. The remaining characters will be parsed so they 12579 * don't need to be fed in again through xmlParseChunk. 12580 * To allow content encoding detection, @size should be >= 4 12581 * The value of @filename is used for fetching external entities 12582 * and error/warning reports. 12583 * 12584 * Returns the new parser context or NULL 12585 */ 12586 12587 xmlParserCtxtPtr 12588 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12589 const char *chunk, int size, const char *filename) { 12590 xmlParserCtxtPtr ctxt; 12591 xmlParserInputPtr inputStream; 12592 xmlParserInputBufferPtr buf; 12593 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 12594 12595 /* 12596 * plug some encoding conversion routines 12597 */ 12598 if ((chunk != NULL) && (size >= 4)) 12599 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 12600 12601 buf = xmlAllocParserInputBuffer(enc); 12602 if (buf == NULL) return(NULL); 12603 12604 ctxt = xmlNewParserCtxt(); 12605 if (ctxt == NULL) { 12606 xmlErrMemory(NULL, "creating parser: out of memory\n"); 12607 xmlFreeParserInputBuffer(buf); 12608 return(NULL); 12609 } 12610 ctxt->dictNames = 1; 12611 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 12612 if (ctxt->pushTab == NULL) { 12613 xmlErrMemory(ctxt, NULL); 12614 xmlFreeParserInputBuffer(buf); 12615 xmlFreeParserCtxt(ctxt); 12616 return(NULL); 12617 } 12618 if (sax != NULL) { 12619 #ifdef LIBXML_SAX1_ENABLED 12620 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12621 #endif /* LIBXML_SAX1_ENABLED */ 12622 xmlFree(ctxt->sax); 12623 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12624 if (ctxt->sax == NULL) { 12625 xmlErrMemory(ctxt, NULL); 12626 xmlFreeParserInputBuffer(buf); 12627 xmlFreeParserCtxt(ctxt); 12628 return(NULL); 12629 } 12630 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12631 if (sax->initialized == XML_SAX2_MAGIC) 12632 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12633 else 12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12635 if (user_data != NULL) 12636 ctxt->userData = user_data; 12637 } 12638 if (filename == NULL) { 12639 ctxt->directory = NULL; 12640 } else { 12641 ctxt->directory = xmlParserGetDirectory(filename); 12642 } 12643 12644 inputStream = xmlNewInputStream(ctxt); 12645 if (inputStream == NULL) { 12646 xmlFreeParserCtxt(ctxt); 12647 xmlFreeParserInputBuffer(buf); 12648 return(NULL); 12649 } 12650 12651 if (filename == NULL) 12652 inputStream->filename = NULL; 12653 else { 12654 inputStream->filename = (char *) 12655 xmlCanonicPath((const xmlChar *) filename); 12656 if (inputStream->filename == NULL) { 12657 xmlFreeParserCtxt(ctxt); 12658 xmlFreeParserInputBuffer(buf); 12659 return(NULL); 12660 } 12661 } 12662 inputStream->buf = buf; 12663 xmlBufResetInput(inputStream->buf->buffer, inputStream); 12664 inputPush(ctxt, inputStream); 12665 12666 /* 12667 * If the caller didn't provide an initial 'chunk' for determining 12668 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 12669 * that it can be automatically determined later 12670 */ 12671 if ((size == 0) || (chunk == NULL)) { 12672 ctxt->charset = XML_CHAR_ENCODING_NONE; 12673 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 12674 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 12675 size_t cur = ctxt->input->cur - ctxt->input->base; 12676 12677 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 12678 12679 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 12680 #ifdef DEBUG_PUSH 12681 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 12682 #endif 12683 } 12684 12685 if (enc != XML_CHAR_ENCODING_NONE) { 12686 xmlSwitchEncoding(ctxt, enc); 12687 } 12688 12689 return(ctxt); 12690 } 12691 #endif /* LIBXML_PUSH_ENABLED */ 12692 12693 /** 12694 * xmlHaltParser: 12695 * @ctxt: an XML parser context 12696 * 12697 * Blocks further parser processing don't override error 12698 * for internal use 12699 */ 12700 static void 12701 xmlHaltParser(xmlParserCtxtPtr ctxt) { 12702 if (ctxt == NULL) 12703 return; 12704 ctxt->instate = XML_PARSER_EOF; 12705 ctxt->disableSAX = 1; 12706 if (ctxt->input != NULL) { 12707 /* 12708 * in case there was a specific allocation deallocate before 12709 * overriding base 12710 */ 12711 if (ctxt->input->free != NULL) { 12712 ctxt->input->free((xmlChar *) ctxt->input->base); 12713 ctxt->input->free = NULL; 12714 } 12715 ctxt->input->cur = BAD_CAST""; 12716 ctxt->input->base = ctxt->input->cur; 12717 } 12718 } 12719 12720 /** 12721 * xmlStopParser: 12722 * @ctxt: an XML parser context 12723 * 12724 * Blocks further parser processing 12725 */ 12726 void 12727 xmlStopParser(xmlParserCtxtPtr ctxt) { 12728 if (ctxt == NULL) 12729 return; 12730 xmlHaltParser(ctxt); 12731 ctxt->errNo = XML_ERR_USER_STOP; 12732 } 12733 12734 /** 12735 * xmlCreateIOParserCtxt: 12736 * @sax: a SAX handler 12737 * @user_data: The user data returned on SAX callbacks 12738 * @ioread: an I/O read function 12739 * @ioclose: an I/O close function 12740 * @ioctx: an I/O handler 12741 * @enc: the charset encoding if known 12742 * 12743 * Create a parser context for using the XML parser with an existing 12744 * I/O stream 12745 * 12746 * Returns the new parser context or NULL 12747 */ 12748 xmlParserCtxtPtr 12749 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 12750 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 12751 void *ioctx, xmlCharEncoding enc) { 12752 xmlParserCtxtPtr ctxt; 12753 xmlParserInputPtr inputStream; 12754 xmlParserInputBufferPtr buf; 12755 12756 if (ioread == NULL) return(NULL); 12757 12758 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 12759 if (buf == NULL) { 12760 if (ioclose != NULL) 12761 ioclose(ioctx); 12762 return (NULL); 12763 } 12764 12765 ctxt = xmlNewParserCtxt(); 12766 if (ctxt == NULL) { 12767 xmlFreeParserInputBuffer(buf); 12768 return(NULL); 12769 } 12770 if (sax != NULL) { 12771 #ifdef LIBXML_SAX1_ENABLED 12772 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12773 #endif /* LIBXML_SAX1_ENABLED */ 12774 xmlFree(ctxt->sax); 12775 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 12776 if (ctxt->sax == NULL) { 12777 xmlErrMemory(ctxt, NULL); 12778 xmlFreeParserCtxt(ctxt); 12779 return(NULL); 12780 } 12781 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 12782 if (sax->initialized == XML_SAX2_MAGIC) 12783 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 12784 else 12785 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 12786 if (user_data != NULL) 12787 ctxt->userData = user_data; 12788 } 12789 12790 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 12791 if (inputStream == NULL) { 12792 xmlFreeParserCtxt(ctxt); 12793 return(NULL); 12794 } 12795 inputPush(ctxt, inputStream); 12796 12797 return(ctxt); 12798 } 12799 12800 #ifdef LIBXML_VALID_ENABLED 12801 /************************************************************************ 12802 * * 12803 * Front ends when parsing a DTD * 12804 * * 12805 ************************************************************************/ 12806 12807 /** 12808 * xmlIOParseDTD: 12809 * @sax: the SAX handler block or NULL 12810 * @input: an Input Buffer 12811 * @enc: the charset encoding if known 12812 * 12813 * Load and parse a DTD 12814 * 12815 * Returns the resulting xmlDtdPtr or NULL in case of error. 12816 * @input will be freed by the function in any case. 12817 */ 12818 12819 xmlDtdPtr 12820 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 12821 xmlCharEncoding enc) { 12822 xmlDtdPtr ret = NULL; 12823 xmlParserCtxtPtr ctxt; 12824 xmlParserInputPtr pinput = NULL; 12825 xmlChar start[4]; 12826 12827 if (input == NULL) 12828 return(NULL); 12829 12830 ctxt = xmlNewParserCtxt(); 12831 if (ctxt == NULL) { 12832 xmlFreeParserInputBuffer(input); 12833 return(NULL); 12834 } 12835 12836 /* We are loading a DTD */ 12837 ctxt->options |= XML_PARSE_DTDLOAD; 12838 12839 /* 12840 * Set-up the SAX context 12841 */ 12842 if (sax != NULL) { 12843 if (ctxt->sax != NULL) 12844 xmlFree(ctxt->sax); 12845 ctxt->sax = sax; 12846 ctxt->userData = ctxt; 12847 } 12848 xmlDetectSAX2(ctxt); 12849 12850 /* 12851 * generate a parser input from the I/O handler 12852 */ 12853 12854 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 12855 if (pinput == NULL) { 12856 if (sax != NULL) ctxt->sax = NULL; 12857 xmlFreeParserInputBuffer(input); 12858 xmlFreeParserCtxt(ctxt); 12859 return(NULL); 12860 } 12861 12862 /* 12863 * plug some encoding conversion routines here. 12864 */ 12865 if (xmlPushInput(ctxt, pinput) < 0) { 12866 if (sax != NULL) ctxt->sax = NULL; 12867 xmlFreeParserCtxt(ctxt); 12868 return(NULL); 12869 } 12870 if (enc != XML_CHAR_ENCODING_NONE) { 12871 xmlSwitchEncoding(ctxt, enc); 12872 } 12873 12874 pinput->filename = NULL; 12875 pinput->line = 1; 12876 pinput->col = 1; 12877 pinput->base = ctxt->input->cur; 12878 pinput->cur = ctxt->input->cur; 12879 pinput->free = NULL; 12880 12881 /* 12882 * let's parse that entity knowing it's an external subset. 12883 */ 12884 ctxt->inSubset = 2; 12885 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12886 if (ctxt->myDoc == NULL) { 12887 xmlErrMemory(ctxt, "New Doc failed"); 12888 return(NULL); 12889 } 12890 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12892 BAD_CAST "none", BAD_CAST "none"); 12893 12894 if ((enc == XML_CHAR_ENCODING_NONE) && 12895 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 12896 /* 12897 * Get the 4 first bytes and decode the charset 12898 * if enc != XML_CHAR_ENCODING_NONE 12899 * plug some encoding conversion routines. 12900 */ 12901 start[0] = RAW; 12902 start[1] = NXT(1); 12903 start[2] = NXT(2); 12904 start[3] = NXT(3); 12905 enc = xmlDetectCharEncoding(start, 4); 12906 if (enc != XML_CHAR_ENCODING_NONE) { 12907 xmlSwitchEncoding(ctxt, enc); 12908 } 12909 } 12910 12911 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 12912 12913 if (ctxt->myDoc != NULL) { 12914 if (ctxt->wellFormed) { 12915 ret = ctxt->myDoc->extSubset; 12916 ctxt->myDoc->extSubset = NULL; 12917 if (ret != NULL) { 12918 xmlNodePtr tmp; 12919 12920 ret->doc = NULL; 12921 tmp = ret->children; 12922 while (tmp != NULL) { 12923 tmp->doc = NULL; 12924 tmp = tmp->next; 12925 } 12926 } 12927 } else { 12928 ret = NULL; 12929 } 12930 xmlFreeDoc(ctxt->myDoc); 12931 ctxt->myDoc = NULL; 12932 } 12933 if (sax != NULL) ctxt->sax = NULL; 12934 xmlFreeParserCtxt(ctxt); 12935 12936 return(ret); 12937 } 12938 12939 /** 12940 * xmlSAXParseDTD: 12941 * @sax: the SAX handler block 12942 * @ExternalID: a NAME* containing the External ID of the DTD 12943 * @SystemID: a NAME* containing the URL to the DTD 12944 * 12945 * Load and parse an external subset. 12946 * 12947 * Returns the resulting xmlDtdPtr or NULL in case of error. 12948 */ 12949 12950 xmlDtdPtr 12951 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12952 const xmlChar *SystemID) { 12953 xmlDtdPtr ret = NULL; 12954 xmlParserCtxtPtr ctxt; 12955 xmlParserInputPtr input = NULL; 12956 xmlCharEncoding enc; 12957 xmlChar* systemIdCanonic; 12958 12959 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12960 12961 ctxt = xmlNewParserCtxt(); 12962 if (ctxt == NULL) { 12963 return(NULL); 12964 } 12965 12966 /* We are loading a DTD */ 12967 ctxt->options |= XML_PARSE_DTDLOAD; 12968 12969 /* 12970 * Set-up the SAX context 12971 */ 12972 if (sax != NULL) { 12973 if (ctxt->sax != NULL) 12974 xmlFree(ctxt->sax); 12975 ctxt->sax = sax; 12976 ctxt->userData = ctxt; 12977 } 12978 12979 /* 12980 * Canonicalise the system ID 12981 */ 12982 systemIdCanonic = xmlCanonicPath(SystemID); 12983 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12984 xmlFreeParserCtxt(ctxt); 12985 return(NULL); 12986 } 12987 12988 /* 12989 * Ask the Entity resolver to load the damn thing 12990 */ 12991 12992 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12993 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12994 systemIdCanonic); 12995 if (input == NULL) { 12996 if (sax != NULL) ctxt->sax = NULL; 12997 xmlFreeParserCtxt(ctxt); 12998 if (systemIdCanonic != NULL) 12999 xmlFree(systemIdCanonic); 13000 return(NULL); 13001 } 13002 13003 /* 13004 * plug some encoding conversion routines here. 13005 */ 13006 if (xmlPushInput(ctxt, input) < 0) { 13007 if (sax != NULL) ctxt->sax = NULL; 13008 xmlFreeParserCtxt(ctxt); 13009 if (systemIdCanonic != NULL) 13010 xmlFree(systemIdCanonic); 13011 return(NULL); 13012 } 13013 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13014 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 13015 xmlSwitchEncoding(ctxt, enc); 13016 } 13017 13018 if (input->filename == NULL) 13019 input->filename = (char *) systemIdCanonic; 13020 else 13021 xmlFree(systemIdCanonic); 13022 input->line = 1; 13023 input->col = 1; 13024 input->base = ctxt->input->cur; 13025 input->cur = ctxt->input->cur; 13026 input->free = NULL; 13027 13028 /* 13029 * let's parse that entity knowing it's an external subset. 13030 */ 13031 ctxt->inSubset = 2; 13032 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 13033 if (ctxt->myDoc == NULL) { 13034 xmlErrMemory(ctxt, "New Doc failed"); 13035 if (sax != NULL) ctxt->sax = NULL; 13036 xmlFreeParserCtxt(ctxt); 13037 return(NULL); 13038 } 13039 ctxt->myDoc->properties = XML_DOC_INTERNAL; 13040 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 13041 ExternalID, SystemID); 13042 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 13043 13044 if (ctxt->myDoc != NULL) { 13045 if (ctxt->wellFormed) { 13046 ret = ctxt->myDoc->extSubset; 13047 ctxt->myDoc->extSubset = NULL; 13048 if (ret != NULL) { 13049 xmlNodePtr tmp; 13050 13051 ret->doc = NULL; 13052 tmp = ret->children; 13053 while (tmp != NULL) { 13054 tmp->doc = NULL; 13055 tmp = tmp->next; 13056 } 13057 } 13058 } else { 13059 ret = NULL; 13060 } 13061 xmlFreeDoc(ctxt->myDoc); 13062 ctxt->myDoc = NULL; 13063 } 13064 if (sax != NULL) ctxt->sax = NULL; 13065 xmlFreeParserCtxt(ctxt); 13066 13067 return(ret); 13068 } 13069 13070 13071 /** 13072 * xmlParseDTD: 13073 * @ExternalID: a NAME* containing the External ID of the DTD 13074 * @SystemID: a NAME* containing the URL to the DTD 13075 * 13076 * Load and parse an external subset. 13077 * 13078 * Returns the resulting xmlDtdPtr or NULL in case of error. 13079 */ 13080 13081 xmlDtdPtr 13082 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 13083 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 13084 } 13085 #endif /* LIBXML_VALID_ENABLED */ 13086 13087 /************************************************************************ 13088 * * 13089 * Front ends when parsing an Entity * 13090 * * 13091 ************************************************************************/ 13092 13093 /** 13094 * xmlParseCtxtExternalEntity: 13095 * @ctx: the existing parsing context 13096 * @URL: the URL for the entity to load 13097 * @ID: the System ID for the entity to load 13098 * @lst: the return value for the set of parsed nodes 13099 * 13100 * Parse an external general entity within an existing parsing context 13101 * An external general parsed entity is well-formed if it matches the 13102 * production labeled extParsedEnt. 13103 * 13104 * [78] extParsedEnt ::= TextDecl? content 13105 * 13106 * Returns 0 if the entity is well formed, -1 in case of args problem and 13107 * the parser error code otherwise 13108 */ 13109 13110 int 13111 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 13112 const xmlChar *ID, xmlNodePtr *lst) { 13113 xmlParserCtxtPtr ctxt; 13114 xmlDocPtr newDoc; 13115 xmlNodePtr newRoot; 13116 xmlSAXHandlerPtr oldsax = NULL; 13117 int ret = 0; 13118 xmlChar start[4]; 13119 xmlCharEncoding enc; 13120 13121 if (ctx == NULL) return(-1); 13122 13123 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 13124 (ctx->depth > 1024)) { 13125 return(XML_ERR_ENTITY_LOOP); 13126 } 13127 13128 if (lst != NULL) 13129 *lst = NULL; 13130 if ((URL == NULL) && (ID == NULL)) 13131 return(-1); 13132 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 13133 return(-1); 13134 13135 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 13136 if (ctxt == NULL) { 13137 return(-1); 13138 } 13139 13140 oldsax = ctxt->sax; 13141 ctxt->sax = ctx->sax; 13142 xmlDetectSAX2(ctxt); 13143 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13144 if (newDoc == NULL) { 13145 xmlFreeParserCtxt(ctxt); 13146 return(-1); 13147 } 13148 newDoc->properties = XML_DOC_INTERNAL; 13149 if (ctx->myDoc->dict) { 13150 newDoc->dict = ctx->myDoc->dict; 13151 xmlDictReference(newDoc->dict); 13152 } 13153 if (ctx->myDoc != NULL) { 13154 newDoc->intSubset = ctx->myDoc->intSubset; 13155 newDoc->extSubset = ctx->myDoc->extSubset; 13156 } 13157 if (ctx->myDoc->URL != NULL) { 13158 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 13159 } 13160 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13161 if (newRoot == NULL) { 13162 ctxt->sax = oldsax; 13163 xmlFreeParserCtxt(ctxt); 13164 newDoc->intSubset = NULL; 13165 newDoc->extSubset = NULL; 13166 xmlFreeDoc(newDoc); 13167 return(-1); 13168 } 13169 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13170 nodePush(ctxt, newDoc->children); 13171 if (ctx->myDoc == NULL) { 13172 ctxt->myDoc = newDoc; 13173 } else { 13174 ctxt->myDoc = ctx->myDoc; 13175 newDoc->children->doc = ctx->myDoc; 13176 } 13177 13178 /* 13179 * Get the 4 first bytes and decode the charset 13180 * if enc != XML_CHAR_ENCODING_NONE 13181 * plug some encoding conversion routines. 13182 */ 13183 GROW 13184 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13185 start[0] = RAW; 13186 start[1] = NXT(1); 13187 start[2] = NXT(2); 13188 start[3] = NXT(3); 13189 enc = xmlDetectCharEncoding(start, 4); 13190 if (enc != XML_CHAR_ENCODING_NONE) { 13191 xmlSwitchEncoding(ctxt, enc); 13192 } 13193 } 13194 13195 /* 13196 * Parse a possible text declaration first 13197 */ 13198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13199 xmlParseTextDecl(ctxt); 13200 /* 13201 * An XML-1.0 document can't reference an entity not XML-1.0 13202 */ 13203 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 13204 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 13205 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 13206 "Version mismatch between document and entity\n"); 13207 } 13208 } 13209 13210 /* 13211 * If the user provided its own SAX callbacks then reuse the 13212 * useData callback field, otherwise the expected setup in a 13213 * DOM builder is to have userData == ctxt 13214 */ 13215 if (ctx->userData == ctx) 13216 ctxt->userData = ctxt; 13217 else 13218 ctxt->userData = ctx->userData; 13219 13220 /* 13221 * Doing validity checking on chunk doesn't make sense 13222 */ 13223 ctxt->instate = XML_PARSER_CONTENT; 13224 ctxt->validate = ctx->validate; 13225 ctxt->valid = ctx->valid; 13226 ctxt->loadsubset = ctx->loadsubset; 13227 ctxt->depth = ctx->depth + 1; 13228 ctxt->replaceEntities = ctx->replaceEntities; 13229 if (ctxt->validate) { 13230 ctxt->vctxt.error = ctx->vctxt.error; 13231 ctxt->vctxt.warning = ctx->vctxt.warning; 13232 } else { 13233 ctxt->vctxt.error = NULL; 13234 ctxt->vctxt.warning = NULL; 13235 } 13236 ctxt->vctxt.nodeTab = NULL; 13237 ctxt->vctxt.nodeNr = 0; 13238 ctxt->vctxt.nodeMax = 0; 13239 ctxt->vctxt.node = NULL; 13240 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13241 ctxt->dict = ctx->dict; 13242 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13243 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13244 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13245 ctxt->dictNames = ctx->dictNames; 13246 ctxt->attsDefault = ctx->attsDefault; 13247 ctxt->attsSpecial = ctx->attsSpecial; 13248 ctxt->linenumbers = ctx->linenumbers; 13249 13250 xmlParseContent(ctxt); 13251 13252 ctx->validate = ctxt->validate; 13253 ctx->valid = ctxt->valid; 13254 if ((RAW == '<') && (NXT(1) == '/')) { 13255 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13256 } else if (RAW != 0) { 13257 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13258 } 13259 if (ctxt->node != newDoc->children) { 13260 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13261 } 13262 13263 if (!ctxt->wellFormed) { 13264 if (ctxt->errNo == 0) 13265 ret = 1; 13266 else 13267 ret = ctxt->errNo; 13268 } else { 13269 if (lst != NULL) { 13270 xmlNodePtr cur; 13271 13272 /* 13273 * Return the newly created nodeset after unlinking it from 13274 * they pseudo parent. 13275 */ 13276 cur = newDoc->children->children; 13277 *lst = cur; 13278 while (cur != NULL) { 13279 cur->parent = NULL; 13280 cur = cur->next; 13281 } 13282 newDoc->children->children = NULL; 13283 } 13284 ret = 0; 13285 } 13286 ctxt->sax = oldsax; 13287 ctxt->dict = NULL; 13288 ctxt->attsDefault = NULL; 13289 ctxt->attsSpecial = NULL; 13290 xmlFreeParserCtxt(ctxt); 13291 newDoc->intSubset = NULL; 13292 newDoc->extSubset = NULL; 13293 xmlFreeDoc(newDoc); 13294 13295 return(ret); 13296 } 13297 13298 /** 13299 * xmlParseExternalEntityPrivate: 13300 * @doc: the document the chunk pertains to 13301 * @oldctxt: the previous parser context if available 13302 * @sax: the SAX handler bloc (possibly NULL) 13303 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13304 * @depth: Used for loop detection, use 0 13305 * @URL: the URL for the entity to load 13306 * @ID: the System ID for the entity to load 13307 * @list: the return value for the set of parsed nodes 13308 * 13309 * Private version of xmlParseExternalEntity() 13310 * 13311 * Returns 0 if the entity is well formed, -1 in case of args problem and 13312 * the parser error code otherwise 13313 */ 13314 13315 static xmlParserErrors 13316 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 13317 xmlSAXHandlerPtr sax, 13318 void *user_data, int depth, const xmlChar *URL, 13319 const xmlChar *ID, xmlNodePtr *list) { 13320 xmlParserCtxtPtr ctxt; 13321 xmlDocPtr newDoc; 13322 xmlNodePtr newRoot; 13323 xmlSAXHandlerPtr oldsax = NULL; 13324 xmlParserErrors ret = XML_ERR_OK; 13325 xmlChar start[4]; 13326 xmlCharEncoding enc; 13327 13328 if (((depth > 40) && 13329 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 13330 (depth > 1024)) { 13331 return(XML_ERR_ENTITY_LOOP); 13332 } 13333 13334 if (list != NULL) 13335 *list = NULL; 13336 if ((URL == NULL) && (ID == NULL)) 13337 return(XML_ERR_INTERNAL_ERROR); 13338 if (doc == NULL) 13339 return(XML_ERR_INTERNAL_ERROR); 13340 13341 13342 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 13343 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13344 ctxt->userData = ctxt; 13345 if (oldctxt != NULL) { 13346 ctxt->_private = oldctxt->_private; 13347 ctxt->loadsubset = oldctxt->loadsubset; 13348 ctxt->validate = oldctxt->validate; 13349 ctxt->external = oldctxt->external; 13350 ctxt->record_info = oldctxt->record_info; 13351 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 13352 ctxt->node_seq.length = oldctxt->node_seq.length; 13353 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 13354 } else { 13355 /* 13356 * Doing validity checking on chunk without context 13357 * doesn't make sense 13358 */ 13359 ctxt->_private = NULL; 13360 ctxt->validate = 0; 13361 ctxt->external = 2; 13362 ctxt->loadsubset = 0; 13363 } 13364 if (sax != NULL) { 13365 oldsax = ctxt->sax; 13366 ctxt->sax = sax; 13367 if (user_data != NULL) 13368 ctxt->userData = user_data; 13369 } 13370 xmlDetectSAX2(ctxt); 13371 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13372 if (newDoc == NULL) { 13373 ctxt->node_seq.maximum = 0; 13374 ctxt->node_seq.length = 0; 13375 ctxt->node_seq.buffer = NULL; 13376 xmlFreeParserCtxt(ctxt); 13377 return(XML_ERR_INTERNAL_ERROR); 13378 } 13379 newDoc->properties = XML_DOC_INTERNAL; 13380 newDoc->intSubset = doc->intSubset; 13381 newDoc->extSubset = doc->extSubset; 13382 newDoc->dict = doc->dict; 13383 xmlDictReference(newDoc->dict); 13384 13385 if (doc->URL != NULL) { 13386 newDoc->URL = xmlStrdup(doc->URL); 13387 } 13388 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13389 if (newRoot == NULL) { 13390 if (sax != NULL) 13391 ctxt->sax = oldsax; 13392 ctxt->node_seq.maximum = 0; 13393 ctxt->node_seq.length = 0; 13394 ctxt->node_seq.buffer = NULL; 13395 xmlFreeParserCtxt(ctxt); 13396 newDoc->intSubset = NULL; 13397 newDoc->extSubset = NULL; 13398 xmlFreeDoc(newDoc); 13399 return(XML_ERR_INTERNAL_ERROR); 13400 } 13401 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13402 nodePush(ctxt, newDoc->children); 13403 ctxt->myDoc = doc; 13404 newRoot->doc = doc; 13405 13406 /* 13407 * Get the 4 first bytes and decode the charset 13408 * if enc != XML_CHAR_ENCODING_NONE 13409 * plug some encoding conversion routines. 13410 */ 13411 GROW; 13412 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 13413 start[0] = RAW; 13414 start[1] = NXT(1); 13415 start[2] = NXT(2); 13416 start[3] = NXT(3); 13417 enc = xmlDetectCharEncoding(start, 4); 13418 if (enc != XML_CHAR_ENCODING_NONE) { 13419 xmlSwitchEncoding(ctxt, enc); 13420 } 13421 } 13422 13423 /* 13424 * Parse a possible text declaration first 13425 */ 13426 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 13427 xmlParseTextDecl(ctxt); 13428 } 13429 13430 ctxt->instate = XML_PARSER_CONTENT; 13431 ctxt->depth = depth; 13432 13433 xmlParseContent(ctxt); 13434 13435 if ((RAW == '<') && (NXT(1) == '/')) { 13436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13437 } else if (RAW != 0) { 13438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13439 } 13440 if (ctxt->node != newDoc->children) { 13441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13442 } 13443 13444 if (!ctxt->wellFormed) { 13445 if (ctxt->errNo == 0) 13446 ret = XML_ERR_INTERNAL_ERROR; 13447 else 13448 ret = (xmlParserErrors)ctxt->errNo; 13449 } else { 13450 if (list != NULL) { 13451 xmlNodePtr cur; 13452 13453 /* 13454 * Return the newly created nodeset after unlinking it from 13455 * they pseudo parent. 13456 */ 13457 cur = newDoc->children->children; 13458 *list = cur; 13459 while (cur != NULL) { 13460 cur->parent = NULL; 13461 cur = cur->next; 13462 } 13463 newDoc->children->children = NULL; 13464 } 13465 ret = XML_ERR_OK; 13466 } 13467 13468 /* 13469 * Record in the parent context the number of entities replacement 13470 * done when parsing that reference. 13471 */ 13472 if (oldctxt != NULL) 13473 oldctxt->nbentities += ctxt->nbentities; 13474 13475 /* 13476 * Also record the size of the entity parsed 13477 */ 13478 if (ctxt->input != NULL && oldctxt != NULL) { 13479 oldctxt->sizeentities += ctxt->input->consumed; 13480 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 13481 } 13482 /* 13483 * And record the last error if any 13484 */ 13485 if (ctxt->lastError.code != XML_ERR_OK) 13486 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13487 13488 if (sax != NULL) 13489 ctxt->sax = oldsax; 13490 if (oldctxt != NULL) { 13491 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 13492 oldctxt->node_seq.length = ctxt->node_seq.length; 13493 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 13494 } 13495 ctxt->node_seq.maximum = 0; 13496 ctxt->node_seq.length = 0; 13497 ctxt->node_seq.buffer = NULL; 13498 xmlFreeParserCtxt(ctxt); 13499 newDoc->intSubset = NULL; 13500 newDoc->extSubset = NULL; 13501 xmlFreeDoc(newDoc); 13502 13503 return(ret); 13504 } 13505 13506 #ifdef LIBXML_SAX1_ENABLED 13507 /** 13508 * xmlParseExternalEntity: 13509 * @doc: the document the chunk pertains to 13510 * @sax: the SAX handler bloc (possibly NULL) 13511 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13512 * @depth: Used for loop detection, use 0 13513 * @URL: the URL for the entity to load 13514 * @ID: the System ID for the entity to load 13515 * @lst: the return value for the set of parsed nodes 13516 * 13517 * Parse an external general entity 13518 * An external general parsed entity is well-formed if it matches the 13519 * production labeled extParsedEnt. 13520 * 13521 * [78] extParsedEnt ::= TextDecl? content 13522 * 13523 * Returns 0 if the entity is well formed, -1 in case of args problem and 13524 * the parser error code otherwise 13525 */ 13526 13527 int 13528 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 13529 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 13530 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 13531 ID, lst)); 13532 } 13533 13534 /** 13535 * xmlParseBalancedChunkMemory: 13536 * @doc: the document the chunk pertains to 13537 * @sax: the SAX handler bloc (possibly NULL) 13538 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13539 * @depth: Used for loop detection, use 0 13540 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13541 * @lst: the return value for the set of parsed nodes 13542 * 13543 * Parse a well-balanced chunk of an XML document 13544 * called by the parser 13545 * The allowed sequence for the Well Balanced Chunk is the one defined by 13546 * the content production in the XML grammar: 13547 * 13548 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13549 * 13550 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13551 * the parser error code otherwise 13552 */ 13553 13554 int 13555 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13556 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 13557 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 13558 depth, string, lst, 0 ); 13559 } 13560 #endif /* LIBXML_SAX1_ENABLED */ 13561 13562 /** 13563 * xmlParseBalancedChunkMemoryInternal: 13564 * @oldctxt: the existing parsing context 13565 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13566 * @user_data: the user data field for the parser context 13567 * @lst: the return value for the set of parsed nodes 13568 * 13569 * 13570 * Parse a well-balanced chunk of an XML document 13571 * called by the parser 13572 * The allowed sequence for the Well Balanced Chunk is the one defined by 13573 * the content production in the XML grammar: 13574 * 13575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13576 * 13577 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13578 * error code otherwise 13579 * 13580 * In case recover is set to 1, the nodelist will not be empty even if 13581 * the parsed chunk is not well balanced. 13582 */ 13583 static xmlParserErrors 13584 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 13585 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 13586 xmlParserCtxtPtr ctxt; 13587 xmlDocPtr newDoc = NULL; 13588 xmlNodePtr newRoot; 13589 xmlSAXHandlerPtr oldsax = NULL; 13590 xmlNodePtr content = NULL; 13591 xmlNodePtr last = NULL; 13592 int size; 13593 xmlParserErrors ret = XML_ERR_OK; 13594 #ifdef SAX2 13595 int i; 13596 #endif 13597 13598 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 13599 (oldctxt->depth > 1024)) { 13600 return(XML_ERR_ENTITY_LOOP); 13601 } 13602 13603 13604 if (lst != NULL) 13605 *lst = NULL; 13606 if (string == NULL) 13607 return(XML_ERR_INTERNAL_ERROR); 13608 13609 size = xmlStrlen(string); 13610 13611 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13612 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 13613 if (user_data != NULL) 13614 ctxt->userData = user_data; 13615 else 13616 ctxt->userData = ctxt; 13617 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 13618 ctxt->dict = oldctxt->dict; 13619 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13620 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13621 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13622 13623 #ifdef SAX2 13624 /* propagate namespaces down the entity */ 13625 for (i = 0;i < oldctxt->nsNr;i += 2) { 13626 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 13627 } 13628 #endif 13629 13630 oldsax = ctxt->sax; 13631 ctxt->sax = oldctxt->sax; 13632 xmlDetectSAX2(ctxt); 13633 ctxt->replaceEntities = oldctxt->replaceEntities; 13634 ctxt->options = oldctxt->options; 13635 13636 ctxt->_private = oldctxt->_private; 13637 if (oldctxt->myDoc == NULL) { 13638 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13639 if (newDoc == NULL) { 13640 ctxt->sax = oldsax; 13641 ctxt->dict = NULL; 13642 xmlFreeParserCtxt(ctxt); 13643 return(XML_ERR_INTERNAL_ERROR); 13644 } 13645 newDoc->properties = XML_DOC_INTERNAL; 13646 newDoc->dict = ctxt->dict; 13647 xmlDictReference(newDoc->dict); 13648 ctxt->myDoc = newDoc; 13649 } else { 13650 ctxt->myDoc = oldctxt->myDoc; 13651 content = ctxt->myDoc->children; 13652 last = ctxt->myDoc->last; 13653 } 13654 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 13655 if (newRoot == NULL) { 13656 ctxt->sax = oldsax; 13657 ctxt->dict = NULL; 13658 xmlFreeParserCtxt(ctxt); 13659 if (newDoc != NULL) { 13660 xmlFreeDoc(newDoc); 13661 } 13662 return(XML_ERR_INTERNAL_ERROR); 13663 } 13664 ctxt->myDoc->children = NULL; 13665 ctxt->myDoc->last = NULL; 13666 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 13667 nodePush(ctxt, ctxt->myDoc->children); 13668 ctxt->instate = XML_PARSER_CONTENT; 13669 ctxt->depth = oldctxt->depth + 1; 13670 13671 ctxt->validate = 0; 13672 ctxt->loadsubset = oldctxt->loadsubset; 13673 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 13674 /* 13675 * ID/IDREF registration will be done in xmlValidateElement below 13676 */ 13677 ctxt->loadsubset |= XML_SKIP_IDS; 13678 } 13679 ctxt->dictNames = oldctxt->dictNames; 13680 ctxt->attsDefault = oldctxt->attsDefault; 13681 ctxt->attsSpecial = oldctxt->attsSpecial; 13682 13683 xmlParseContent(ctxt); 13684 if ((RAW == '<') && (NXT(1) == '/')) { 13685 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13686 } else if (RAW != 0) { 13687 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13688 } 13689 if (ctxt->node != ctxt->myDoc->children) { 13690 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13691 } 13692 13693 if (!ctxt->wellFormed) { 13694 if (ctxt->errNo == 0) 13695 ret = XML_ERR_INTERNAL_ERROR; 13696 else 13697 ret = (xmlParserErrors)ctxt->errNo; 13698 } else { 13699 ret = XML_ERR_OK; 13700 } 13701 13702 if ((lst != NULL) && (ret == XML_ERR_OK)) { 13703 xmlNodePtr cur; 13704 13705 /* 13706 * Return the newly created nodeset after unlinking it from 13707 * they pseudo parent. 13708 */ 13709 cur = ctxt->myDoc->children->children; 13710 *lst = cur; 13711 while (cur != NULL) { 13712 #ifdef LIBXML_VALID_ENABLED 13713 if ((oldctxt->validate) && (oldctxt->wellFormed) && 13714 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 13715 (cur->type == XML_ELEMENT_NODE)) { 13716 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 13717 oldctxt->myDoc, cur); 13718 } 13719 #endif /* LIBXML_VALID_ENABLED */ 13720 cur->parent = NULL; 13721 cur = cur->next; 13722 } 13723 ctxt->myDoc->children->children = NULL; 13724 } 13725 if (ctxt->myDoc != NULL) { 13726 xmlFreeNode(ctxt->myDoc->children); 13727 ctxt->myDoc->children = content; 13728 ctxt->myDoc->last = last; 13729 } 13730 13731 /* 13732 * Record in the parent context the number of entities replacement 13733 * done when parsing that reference. 13734 */ 13735 if (oldctxt != NULL) 13736 oldctxt->nbentities += ctxt->nbentities; 13737 13738 /* 13739 * Also record the last error if any 13740 */ 13741 if (ctxt->lastError.code != XML_ERR_OK) 13742 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 13743 13744 ctxt->sax = oldsax; 13745 ctxt->dict = NULL; 13746 ctxt->attsDefault = NULL; 13747 ctxt->attsSpecial = NULL; 13748 xmlFreeParserCtxt(ctxt); 13749 if (newDoc != NULL) { 13750 xmlFreeDoc(newDoc); 13751 } 13752 13753 return(ret); 13754 } 13755 13756 /** 13757 * xmlParseInNodeContext: 13758 * @node: the context node 13759 * @data: the input string 13760 * @datalen: the input string length in bytes 13761 * @options: a combination of xmlParserOption 13762 * @lst: the return value for the set of parsed nodes 13763 * 13764 * Parse a well-balanced chunk of an XML document 13765 * within the context (DTD, namespaces, etc ...) of the given node. 13766 * 13767 * The allowed sequence for the data is a Well Balanced Chunk defined by 13768 * the content production in the XML grammar: 13769 * 13770 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13771 * 13772 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 13773 * error code otherwise 13774 */ 13775 xmlParserErrors 13776 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 13777 int options, xmlNodePtr *lst) { 13778 #ifdef SAX2 13779 xmlParserCtxtPtr ctxt; 13780 xmlDocPtr doc = NULL; 13781 xmlNodePtr fake, cur; 13782 int nsnr = 0; 13783 13784 xmlParserErrors ret = XML_ERR_OK; 13785 13786 /* 13787 * check all input parameters, grab the document 13788 */ 13789 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 13790 return(XML_ERR_INTERNAL_ERROR); 13791 switch (node->type) { 13792 case XML_ELEMENT_NODE: 13793 case XML_ATTRIBUTE_NODE: 13794 case XML_TEXT_NODE: 13795 case XML_CDATA_SECTION_NODE: 13796 case XML_ENTITY_REF_NODE: 13797 case XML_PI_NODE: 13798 case XML_COMMENT_NODE: 13799 case XML_DOCUMENT_NODE: 13800 case XML_HTML_DOCUMENT_NODE: 13801 break; 13802 default: 13803 return(XML_ERR_INTERNAL_ERROR); 13804 13805 } 13806 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 13807 (node->type != XML_DOCUMENT_NODE) && 13808 (node->type != XML_HTML_DOCUMENT_NODE)) 13809 node = node->parent; 13810 if (node == NULL) 13811 return(XML_ERR_INTERNAL_ERROR); 13812 if (node->type == XML_ELEMENT_NODE) 13813 doc = node->doc; 13814 else 13815 doc = (xmlDocPtr) node; 13816 if (doc == NULL) 13817 return(XML_ERR_INTERNAL_ERROR); 13818 13819 /* 13820 * allocate a context and set-up everything not related to the 13821 * node position in the tree 13822 */ 13823 if (doc->type == XML_DOCUMENT_NODE) 13824 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 13825 #ifdef LIBXML_HTML_ENABLED 13826 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 13827 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 13828 /* 13829 * When parsing in context, it makes no sense to add implied 13830 * elements like html/body/etc... 13831 */ 13832 options |= HTML_PARSE_NOIMPLIED; 13833 } 13834 #endif 13835 else 13836 return(XML_ERR_INTERNAL_ERROR); 13837 13838 if (ctxt == NULL) 13839 return(XML_ERR_NO_MEMORY); 13840 13841 /* 13842 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 13843 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 13844 * we must wait until the last moment to free the original one. 13845 */ 13846 if (doc->dict != NULL) { 13847 if (ctxt->dict != NULL) 13848 xmlDictFree(ctxt->dict); 13849 ctxt->dict = doc->dict; 13850 } else 13851 options |= XML_PARSE_NODICT; 13852 13853 if (doc->encoding != NULL) { 13854 xmlCharEncodingHandlerPtr hdlr; 13855 13856 if (ctxt->encoding != NULL) 13857 xmlFree((xmlChar *) ctxt->encoding); 13858 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 13859 13860 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding); 13861 if (hdlr != NULL) { 13862 xmlSwitchToEncoding(ctxt, hdlr); 13863 } else { 13864 return(XML_ERR_UNSUPPORTED_ENCODING); 13865 } 13866 } 13867 13868 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13869 xmlDetectSAX2(ctxt); 13870 ctxt->myDoc = doc; 13871 /* parsing in context, i.e. as within existing content */ 13872 ctxt->instate = XML_PARSER_CONTENT; 13873 13874 fake = xmlNewComment(NULL); 13875 if (fake == NULL) { 13876 xmlFreeParserCtxt(ctxt); 13877 return(XML_ERR_NO_MEMORY); 13878 } 13879 xmlAddChild(node, fake); 13880 13881 if (node->type == XML_ELEMENT_NODE) { 13882 nodePush(ctxt, node); 13883 /* 13884 * initialize the SAX2 namespaces stack 13885 */ 13886 cur = node; 13887 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 13888 xmlNsPtr ns = cur->nsDef; 13889 const xmlChar *iprefix, *ihref; 13890 13891 while (ns != NULL) { 13892 if (ctxt->dict) { 13893 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 13894 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 13895 } else { 13896 iprefix = ns->prefix; 13897 ihref = ns->href; 13898 } 13899 13900 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 13901 nsPush(ctxt, iprefix, ihref); 13902 nsnr++; 13903 } 13904 ns = ns->next; 13905 } 13906 cur = cur->parent; 13907 } 13908 } 13909 13910 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 13911 /* 13912 * ID/IDREF registration will be done in xmlValidateElement below 13913 */ 13914 ctxt->loadsubset |= XML_SKIP_IDS; 13915 } 13916 13917 #ifdef LIBXML_HTML_ENABLED 13918 if (doc->type == XML_HTML_DOCUMENT_NODE) 13919 __htmlParseContent(ctxt); 13920 else 13921 #endif 13922 xmlParseContent(ctxt); 13923 13924 nsPop(ctxt, nsnr); 13925 if ((RAW == '<') && (NXT(1) == '/')) { 13926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13927 } else if (RAW != 0) { 13928 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13929 } 13930 if ((ctxt->node != NULL) && (ctxt->node != node)) { 13931 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13932 ctxt->wellFormed = 0; 13933 } 13934 13935 if (!ctxt->wellFormed) { 13936 if (ctxt->errNo == 0) 13937 ret = XML_ERR_INTERNAL_ERROR; 13938 else 13939 ret = (xmlParserErrors)ctxt->errNo; 13940 } else { 13941 ret = XML_ERR_OK; 13942 } 13943 13944 /* 13945 * Return the newly created nodeset after unlinking it from 13946 * the pseudo sibling. 13947 */ 13948 13949 cur = fake->next; 13950 fake->next = NULL; 13951 node->last = fake; 13952 13953 if (cur != NULL) { 13954 cur->prev = NULL; 13955 } 13956 13957 *lst = cur; 13958 13959 while (cur != NULL) { 13960 cur->parent = NULL; 13961 cur = cur->next; 13962 } 13963 13964 xmlUnlinkNode(fake); 13965 xmlFreeNode(fake); 13966 13967 13968 if (ret != XML_ERR_OK) { 13969 xmlFreeNodeList(*lst); 13970 *lst = NULL; 13971 } 13972 13973 if (doc->dict != NULL) 13974 ctxt->dict = NULL; 13975 xmlFreeParserCtxt(ctxt); 13976 13977 return(ret); 13978 #else /* !SAX2 */ 13979 return(XML_ERR_INTERNAL_ERROR); 13980 #endif 13981 } 13982 13983 #ifdef LIBXML_SAX1_ENABLED 13984 /** 13985 * xmlParseBalancedChunkMemoryRecover: 13986 * @doc: the document the chunk pertains to 13987 * @sax: the SAX handler bloc (possibly NULL) 13988 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13989 * @depth: Used for loop detection, use 0 13990 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13991 * @lst: the return value for the set of parsed nodes 13992 * @recover: return nodes even if the data is broken (use 0) 13993 * 13994 * 13995 * Parse a well-balanced chunk of an XML document 13996 * called by the parser 13997 * The allowed sequence for the Well Balanced Chunk is the one defined by 13998 * the content production in the XML grammar: 13999 * 14000 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 14001 * 14002 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 14003 * the parser error code otherwise 14004 * 14005 * In case recover is set to 1, the nodelist will not be empty even if 14006 * the parsed chunk is not well balanced, assuming the parsing succeeded to 14007 * some extent. 14008 */ 14009 int 14010 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 14011 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 14012 int recover) { 14013 xmlParserCtxtPtr ctxt; 14014 xmlDocPtr newDoc; 14015 xmlSAXHandlerPtr oldsax = NULL; 14016 xmlNodePtr content, newRoot; 14017 int size; 14018 int ret = 0; 14019 14020 if (depth > 40) { 14021 return(XML_ERR_ENTITY_LOOP); 14022 } 14023 14024 14025 if (lst != NULL) 14026 *lst = NULL; 14027 if (string == NULL) 14028 return(-1); 14029 14030 size = xmlStrlen(string); 14031 14032 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 14033 if (ctxt == NULL) return(-1); 14034 ctxt->userData = ctxt; 14035 if (sax != NULL) { 14036 oldsax = ctxt->sax; 14037 ctxt->sax = sax; 14038 if (user_data != NULL) 14039 ctxt->userData = user_data; 14040 } 14041 newDoc = xmlNewDoc(BAD_CAST "1.0"); 14042 if (newDoc == NULL) { 14043 xmlFreeParserCtxt(ctxt); 14044 return(-1); 14045 } 14046 newDoc->properties = XML_DOC_INTERNAL; 14047 if ((doc != NULL) && (doc->dict != NULL)) { 14048 xmlDictFree(ctxt->dict); 14049 ctxt->dict = doc->dict; 14050 xmlDictReference(ctxt->dict); 14051 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 14052 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 14053 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 14054 ctxt->dictNames = 1; 14055 } else { 14056 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 14057 } 14058 if (doc != NULL) { 14059 newDoc->intSubset = doc->intSubset; 14060 newDoc->extSubset = doc->extSubset; 14061 } 14062 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 14063 if (newRoot == NULL) { 14064 if (sax != NULL) 14065 ctxt->sax = oldsax; 14066 xmlFreeParserCtxt(ctxt); 14067 newDoc->intSubset = NULL; 14068 newDoc->extSubset = NULL; 14069 xmlFreeDoc(newDoc); 14070 return(-1); 14071 } 14072 xmlAddChild((xmlNodePtr) newDoc, newRoot); 14073 nodePush(ctxt, newRoot); 14074 if (doc == NULL) { 14075 ctxt->myDoc = newDoc; 14076 } else { 14077 ctxt->myDoc = newDoc; 14078 newDoc->children->doc = doc; 14079 /* Ensure that doc has XML spec namespace */ 14080 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 14081 newDoc->oldNs = doc->oldNs; 14082 } 14083 ctxt->instate = XML_PARSER_CONTENT; 14084 ctxt->depth = depth; 14085 14086 /* 14087 * Doing validity checking on chunk doesn't make sense 14088 */ 14089 ctxt->validate = 0; 14090 ctxt->loadsubset = 0; 14091 xmlDetectSAX2(ctxt); 14092 14093 if ( doc != NULL ){ 14094 content = doc->children; 14095 doc->children = NULL; 14096 xmlParseContent(ctxt); 14097 doc->children = content; 14098 } 14099 else { 14100 xmlParseContent(ctxt); 14101 } 14102 if ((RAW == '<') && (NXT(1) == '/')) { 14103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14104 } else if (RAW != 0) { 14105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 14106 } 14107 if (ctxt->node != newDoc->children) { 14108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 14109 } 14110 14111 if (!ctxt->wellFormed) { 14112 if (ctxt->errNo == 0) 14113 ret = 1; 14114 else 14115 ret = ctxt->errNo; 14116 } else { 14117 ret = 0; 14118 } 14119 14120 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 14121 xmlNodePtr cur; 14122 14123 /* 14124 * Return the newly created nodeset after unlinking it from 14125 * they pseudo parent. 14126 */ 14127 cur = newDoc->children->children; 14128 *lst = cur; 14129 while (cur != NULL) { 14130 xmlSetTreeDoc(cur, doc); 14131 cur->parent = NULL; 14132 cur = cur->next; 14133 } 14134 newDoc->children->children = NULL; 14135 } 14136 14137 if (sax != NULL) 14138 ctxt->sax = oldsax; 14139 xmlFreeParserCtxt(ctxt); 14140 newDoc->intSubset = NULL; 14141 newDoc->extSubset = NULL; 14142 newDoc->oldNs = NULL; 14143 xmlFreeDoc(newDoc); 14144 14145 return(ret); 14146 } 14147 14148 /** 14149 * xmlSAXParseEntity: 14150 * @sax: the SAX handler block 14151 * @filename: the filename 14152 * 14153 * parse an XML external entity out of context and build a tree. 14154 * It use the given SAX function block to handle the parsing callback. 14155 * If sax is NULL, fallback to the default DOM tree building routines. 14156 * 14157 * [78] extParsedEnt ::= TextDecl? content 14158 * 14159 * This correspond to a "Well Balanced" chunk 14160 * 14161 * Returns the resulting document tree 14162 */ 14163 14164 xmlDocPtr 14165 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 14166 xmlDocPtr ret; 14167 xmlParserCtxtPtr ctxt; 14168 14169 ctxt = xmlCreateFileParserCtxt(filename); 14170 if (ctxt == NULL) { 14171 return(NULL); 14172 } 14173 if (sax != NULL) { 14174 if (ctxt->sax != NULL) 14175 xmlFree(ctxt->sax); 14176 ctxt->sax = sax; 14177 ctxt->userData = NULL; 14178 } 14179 14180 xmlParseExtParsedEnt(ctxt); 14181 14182 if (ctxt->wellFormed) 14183 ret = ctxt->myDoc; 14184 else { 14185 ret = NULL; 14186 xmlFreeDoc(ctxt->myDoc); 14187 ctxt->myDoc = NULL; 14188 } 14189 if (sax != NULL) 14190 ctxt->sax = NULL; 14191 xmlFreeParserCtxt(ctxt); 14192 14193 return(ret); 14194 } 14195 14196 /** 14197 * xmlParseEntity: 14198 * @filename: the filename 14199 * 14200 * parse an XML external entity out of context and build a tree. 14201 * 14202 * [78] extParsedEnt ::= TextDecl? content 14203 * 14204 * This correspond to a "Well Balanced" chunk 14205 * 14206 * Returns the resulting document tree 14207 */ 14208 14209 xmlDocPtr 14210 xmlParseEntity(const char *filename) { 14211 return(xmlSAXParseEntity(NULL, filename)); 14212 } 14213 #endif /* LIBXML_SAX1_ENABLED */ 14214 14215 /** 14216 * xmlCreateEntityParserCtxtInternal: 14217 * @URL: the entity URL 14218 * @ID: the entity PUBLIC ID 14219 * @base: a possible base for the target URI 14220 * @pctx: parser context used to set options on new context 14221 * 14222 * Create a parser context for an external entity 14223 * Automatic support for ZLIB/Compress compressed document is provided 14224 * by default if found at compile-time. 14225 * 14226 * Returns the new parser context or NULL 14227 */ 14228 static xmlParserCtxtPtr 14229 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 14230 const xmlChar *base, xmlParserCtxtPtr pctx) { 14231 xmlParserCtxtPtr ctxt; 14232 xmlParserInputPtr inputStream; 14233 char *directory = NULL; 14234 xmlChar *uri; 14235 14236 ctxt = xmlNewParserCtxt(); 14237 if (ctxt == NULL) { 14238 return(NULL); 14239 } 14240 14241 if (pctx != NULL) { 14242 ctxt->options = pctx->options; 14243 ctxt->_private = pctx->_private; 14244 } 14245 14246 uri = xmlBuildURI(URL, base); 14247 14248 if (uri == NULL) { 14249 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 14250 if (inputStream == NULL) { 14251 xmlFreeParserCtxt(ctxt); 14252 return(NULL); 14253 } 14254 14255 inputPush(ctxt, inputStream); 14256 14257 if ((ctxt->directory == NULL) && (directory == NULL)) 14258 directory = xmlParserGetDirectory((char *)URL); 14259 if ((ctxt->directory == NULL) && (directory != NULL)) 14260 ctxt->directory = directory; 14261 } else { 14262 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 14263 if (inputStream == NULL) { 14264 xmlFree(uri); 14265 xmlFreeParserCtxt(ctxt); 14266 return(NULL); 14267 } 14268 14269 inputPush(ctxt, inputStream); 14270 14271 if ((ctxt->directory == NULL) && (directory == NULL)) 14272 directory = xmlParserGetDirectory((char *)uri); 14273 if ((ctxt->directory == NULL) && (directory != NULL)) 14274 ctxt->directory = directory; 14275 xmlFree(uri); 14276 } 14277 return(ctxt); 14278 } 14279 14280 /** 14281 * xmlCreateEntityParserCtxt: 14282 * @URL: the entity URL 14283 * @ID: the entity PUBLIC ID 14284 * @base: a possible base for the target URI 14285 * 14286 * Create a parser context for an external entity 14287 * Automatic support for ZLIB/Compress compressed document is provided 14288 * by default if found at compile-time. 14289 * 14290 * Returns the new parser context or NULL 14291 */ 14292 xmlParserCtxtPtr 14293 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 14294 const xmlChar *base) { 14295 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 14296 14297 } 14298 14299 /************************************************************************ 14300 * * 14301 * Front ends when parsing from a file * 14302 * * 14303 ************************************************************************/ 14304 14305 /** 14306 * xmlCreateURLParserCtxt: 14307 * @filename: the filename or URL 14308 * @options: a combination of xmlParserOption 14309 * 14310 * Create a parser context for a file or URL content. 14311 * Automatic support for ZLIB/Compress compressed document is provided 14312 * by default if found at compile-time and for file accesses 14313 * 14314 * Returns the new parser context or NULL 14315 */ 14316 xmlParserCtxtPtr 14317 xmlCreateURLParserCtxt(const char *filename, int options) 14318 { 14319 xmlParserCtxtPtr ctxt; 14320 xmlParserInputPtr inputStream; 14321 char *directory = NULL; 14322 14323 ctxt = xmlNewParserCtxt(); 14324 if (ctxt == NULL) { 14325 xmlErrMemory(NULL, "cannot allocate parser context"); 14326 return(NULL); 14327 } 14328 14329 if (options) 14330 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 14331 ctxt->linenumbers = 1; 14332 14333 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 14334 if (inputStream == NULL) { 14335 xmlFreeParserCtxt(ctxt); 14336 return(NULL); 14337 } 14338 14339 inputPush(ctxt, inputStream); 14340 if ((ctxt->directory == NULL) && (directory == NULL)) 14341 directory = xmlParserGetDirectory(filename); 14342 if ((ctxt->directory == NULL) && (directory != NULL)) 14343 ctxt->directory = directory; 14344 14345 return(ctxt); 14346 } 14347 14348 /** 14349 * xmlCreateFileParserCtxt: 14350 * @filename: the filename 14351 * 14352 * Create a parser context for a file content. 14353 * Automatic support for ZLIB/Compress compressed document is provided 14354 * by default if found at compile-time. 14355 * 14356 * Returns the new parser context or NULL 14357 */ 14358 xmlParserCtxtPtr 14359 xmlCreateFileParserCtxt(const char *filename) 14360 { 14361 return(xmlCreateURLParserCtxt(filename, 0)); 14362 } 14363 14364 #ifdef LIBXML_SAX1_ENABLED 14365 /** 14366 * xmlSAXParseFileWithData: 14367 * @sax: the SAX handler block 14368 * @filename: the filename 14369 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14370 * documents 14371 * @data: the userdata 14372 * 14373 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14374 * compressed document is provided by default if found at compile-time. 14375 * It use the given SAX function block to handle the parsing callback. 14376 * If sax is NULL, fallback to the default DOM tree building routines. 14377 * 14378 * User data (void *) is stored within the parser context in the 14379 * context's _private member, so it is available nearly everywhere in libxml 14380 * 14381 * Returns the resulting document tree 14382 */ 14383 14384 xmlDocPtr 14385 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 14386 int recovery, void *data) { 14387 xmlDocPtr ret; 14388 xmlParserCtxtPtr ctxt; 14389 14390 xmlInitParser(); 14391 14392 ctxt = xmlCreateFileParserCtxt(filename); 14393 if (ctxt == NULL) { 14394 return(NULL); 14395 } 14396 if (sax != NULL) { 14397 if (ctxt->sax != NULL) 14398 xmlFree(ctxt->sax); 14399 ctxt->sax = sax; 14400 } 14401 xmlDetectSAX2(ctxt); 14402 if (data!=NULL) { 14403 ctxt->_private = data; 14404 } 14405 14406 if (ctxt->directory == NULL) 14407 ctxt->directory = xmlParserGetDirectory(filename); 14408 14409 ctxt->recovery = recovery; 14410 14411 xmlParseDocument(ctxt); 14412 14413 if ((ctxt->wellFormed) || recovery) { 14414 ret = ctxt->myDoc; 14415 if (ret != NULL) { 14416 if (ctxt->input->buf->compressed > 0) 14417 ret->compression = 9; 14418 else 14419 ret->compression = ctxt->input->buf->compressed; 14420 } 14421 } 14422 else { 14423 ret = NULL; 14424 xmlFreeDoc(ctxt->myDoc); 14425 ctxt->myDoc = NULL; 14426 } 14427 if (sax != NULL) 14428 ctxt->sax = NULL; 14429 xmlFreeParserCtxt(ctxt); 14430 14431 return(ret); 14432 } 14433 14434 /** 14435 * xmlSAXParseFile: 14436 * @sax: the SAX handler block 14437 * @filename: the filename 14438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14439 * documents 14440 * 14441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14442 * compressed document is provided by default if found at compile-time. 14443 * It use the given SAX function block to handle the parsing callback. 14444 * If sax is NULL, fallback to the default DOM tree building routines. 14445 * 14446 * Returns the resulting document tree 14447 */ 14448 14449 xmlDocPtr 14450 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 14451 int recovery) { 14452 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 14453 } 14454 14455 /** 14456 * xmlRecoverDoc: 14457 * @cur: a pointer to an array of xmlChar 14458 * 14459 * parse an XML in-memory document and build a tree. 14460 * In the case the document is not Well Formed, a attempt to build a 14461 * tree is tried anyway 14462 * 14463 * Returns the resulting document tree or NULL in case of failure 14464 */ 14465 14466 xmlDocPtr 14467 xmlRecoverDoc(const xmlChar *cur) { 14468 return(xmlSAXParseDoc(NULL, cur, 1)); 14469 } 14470 14471 /** 14472 * xmlParseFile: 14473 * @filename: the filename 14474 * 14475 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14476 * compressed document is provided by default if found at compile-time. 14477 * 14478 * Returns the resulting document tree if the file was wellformed, 14479 * NULL otherwise. 14480 */ 14481 14482 xmlDocPtr 14483 xmlParseFile(const char *filename) { 14484 return(xmlSAXParseFile(NULL, filename, 0)); 14485 } 14486 14487 /** 14488 * xmlRecoverFile: 14489 * @filename: the filename 14490 * 14491 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 14492 * compressed document is provided by default if found at compile-time. 14493 * In the case the document is not Well Formed, it attempts to build 14494 * a tree anyway 14495 * 14496 * Returns the resulting document tree or NULL in case of failure 14497 */ 14498 14499 xmlDocPtr 14500 xmlRecoverFile(const char *filename) { 14501 return(xmlSAXParseFile(NULL, filename, 1)); 14502 } 14503 14504 14505 /** 14506 * xmlSetupParserForBuffer: 14507 * @ctxt: an XML parser context 14508 * @buffer: a xmlChar * buffer 14509 * @filename: a file name 14510 * 14511 * Setup the parser context to parse a new buffer; Clears any prior 14512 * contents from the parser context. The buffer parameter must not be 14513 * NULL, but the filename parameter can be 14514 */ 14515 void 14516 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 14517 const char* filename) 14518 { 14519 xmlParserInputPtr input; 14520 14521 if ((ctxt == NULL) || (buffer == NULL)) 14522 return; 14523 14524 input = xmlNewInputStream(ctxt); 14525 if (input == NULL) { 14526 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 14527 xmlClearParserCtxt(ctxt); 14528 return; 14529 } 14530 14531 xmlClearParserCtxt(ctxt); 14532 if (filename != NULL) 14533 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 14534 input->base = buffer; 14535 input->cur = buffer; 14536 input->end = &buffer[xmlStrlen(buffer)]; 14537 inputPush(ctxt, input); 14538 } 14539 14540 /** 14541 * xmlSAXUserParseFile: 14542 * @sax: a SAX handler 14543 * @user_data: The user data returned on SAX callbacks 14544 * @filename: a file name 14545 * 14546 * parse an XML file and call the given SAX handler routines. 14547 * Automatic support for ZLIB/Compress compressed document is provided 14548 * 14549 * Returns 0 in case of success or a error number otherwise 14550 */ 14551 int 14552 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 14553 const char *filename) { 14554 int ret = 0; 14555 xmlParserCtxtPtr ctxt; 14556 14557 ctxt = xmlCreateFileParserCtxt(filename); 14558 if (ctxt == NULL) return -1; 14559 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14560 xmlFree(ctxt->sax); 14561 ctxt->sax = sax; 14562 xmlDetectSAX2(ctxt); 14563 14564 if (user_data != NULL) 14565 ctxt->userData = user_data; 14566 14567 xmlParseDocument(ctxt); 14568 14569 if (ctxt->wellFormed) 14570 ret = 0; 14571 else { 14572 if (ctxt->errNo != 0) 14573 ret = ctxt->errNo; 14574 else 14575 ret = -1; 14576 } 14577 if (sax != NULL) 14578 ctxt->sax = NULL; 14579 if (ctxt->myDoc != NULL) { 14580 xmlFreeDoc(ctxt->myDoc); 14581 ctxt->myDoc = NULL; 14582 } 14583 xmlFreeParserCtxt(ctxt); 14584 14585 return ret; 14586 } 14587 #endif /* LIBXML_SAX1_ENABLED */ 14588 14589 /************************************************************************ 14590 * * 14591 * Front ends when parsing from memory * 14592 * * 14593 ************************************************************************/ 14594 14595 /** 14596 * xmlCreateMemoryParserCtxt: 14597 * @buffer: a pointer to a char array 14598 * @size: the size of the array 14599 * 14600 * Create a parser context for an XML in-memory document. 14601 * 14602 * Returns the new parser context or NULL 14603 */ 14604 xmlParserCtxtPtr 14605 xmlCreateMemoryParserCtxt(const char *buffer, int size) { 14606 xmlParserCtxtPtr ctxt; 14607 xmlParserInputPtr input; 14608 xmlParserInputBufferPtr buf; 14609 14610 if (buffer == NULL) 14611 return(NULL); 14612 if (size <= 0) 14613 return(NULL); 14614 14615 ctxt = xmlNewParserCtxt(); 14616 if (ctxt == NULL) 14617 return(NULL); 14618 14619 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 14620 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14621 if (buf == NULL) { 14622 xmlFreeParserCtxt(ctxt); 14623 return(NULL); 14624 } 14625 14626 input = xmlNewInputStream(ctxt); 14627 if (input == NULL) { 14628 xmlFreeParserInputBuffer(buf); 14629 xmlFreeParserCtxt(ctxt); 14630 return(NULL); 14631 } 14632 14633 input->filename = NULL; 14634 input->buf = buf; 14635 xmlBufResetInput(input->buf->buffer, input); 14636 14637 inputPush(ctxt, input); 14638 return(ctxt); 14639 } 14640 14641 #ifdef LIBXML_SAX1_ENABLED 14642 /** 14643 * xmlSAXParseMemoryWithData: 14644 * @sax: the SAX handler block 14645 * @buffer: an pointer to a char array 14646 * @size: the size of the array 14647 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14648 * documents 14649 * @data: the userdata 14650 * 14651 * parse an XML in-memory block and use the given SAX function block 14652 * to handle the parsing callback. If sax is NULL, fallback to the default 14653 * DOM tree building routines. 14654 * 14655 * User data (void *) is stored within the parser context in the 14656 * context's _private member, so it is available nearly everywhere in libxml 14657 * 14658 * Returns the resulting document tree 14659 */ 14660 14661 xmlDocPtr 14662 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 14663 int size, int recovery, void *data) { 14664 xmlDocPtr ret; 14665 xmlParserCtxtPtr ctxt; 14666 14667 xmlInitParser(); 14668 14669 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14670 if (ctxt == NULL) return(NULL); 14671 if (sax != NULL) { 14672 if (ctxt->sax != NULL) 14673 xmlFree(ctxt->sax); 14674 ctxt->sax = sax; 14675 } 14676 xmlDetectSAX2(ctxt); 14677 if (data!=NULL) { 14678 ctxt->_private=data; 14679 } 14680 14681 ctxt->recovery = recovery; 14682 14683 xmlParseDocument(ctxt); 14684 14685 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14686 else { 14687 ret = NULL; 14688 xmlFreeDoc(ctxt->myDoc); 14689 ctxt->myDoc = NULL; 14690 } 14691 if (sax != NULL) 14692 ctxt->sax = NULL; 14693 xmlFreeParserCtxt(ctxt); 14694 14695 return(ret); 14696 } 14697 14698 /** 14699 * xmlSAXParseMemory: 14700 * @sax: the SAX handler block 14701 * @buffer: an pointer to a char array 14702 * @size: the size of the array 14703 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 14704 * documents 14705 * 14706 * parse an XML in-memory block and use the given SAX function block 14707 * to handle the parsing callback. If sax is NULL, fallback to the default 14708 * DOM tree building routines. 14709 * 14710 * Returns the resulting document tree 14711 */ 14712 xmlDocPtr 14713 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 14714 int size, int recovery) { 14715 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 14716 } 14717 14718 /** 14719 * xmlParseMemory: 14720 * @buffer: an pointer to a char array 14721 * @size: the size of the array 14722 * 14723 * parse an XML in-memory block and build a tree. 14724 * 14725 * Returns the resulting document tree 14726 */ 14727 14728 xmlDocPtr xmlParseMemory(const char *buffer, int size) { 14729 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 14730 } 14731 14732 /** 14733 * xmlRecoverMemory: 14734 * @buffer: an pointer to a char array 14735 * @size: the size of the array 14736 * 14737 * parse an XML in-memory block and build a tree. 14738 * In the case the document is not Well Formed, an attempt to 14739 * build a tree is tried anyway 14740 * 14741 * Returns the resulting document tree or NULL in case of error 14742 */ 14743 14744 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 14745 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 14746 } 14747 14748 /** 14749 * xmlSAXUserParseMemory: 14750 * @sax: a SAX handler 14751 * @user_data: The user data returned on SAX callbacks 14752 * @buffer: an in-memory XML document input 14753 * @size: the length of the XML document in bytes 14754 * 14755 * A better SAX parsing routine. 14756 * parse an XML in-memory buffer and call the given SAX handler routines. 14757 * 14758 * Returns 0 in case of success or a error number otherwise 14759 */ 14760 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 14761 const char *buffer, int size) { 14762 int ret = 0; 14763 xmlParserCtxtPtr ctxt; 14764 14765 xmlInitParser(); 14766 14767 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14768 if (ctxt == NULL) return -1; 14769 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 14770 xmlFree(ctxt->sax); 14771 ctxt->sax = sax; 14772 xmlDetectSAX2(ctxt); 14773 14774 if (user_data != NULL) 14775 ctxt->userData = user_data; 14776 14777 xmlParseDocument(ctxt); 14778 14779 if (ctxt->wellFormed) 14780 ret = 0; 14781 else { 14782 if (ctxt->errNo != 0) 14783 ret = ctxt->errNo; 14784 else 14785 ret = -1; 14786 } 14787 if (sax != NULL) 14788 ctxt->sax = NULL; 14789 if (ctxt->myDoc != NULL) { 14790 xmlFreeDoc(ctxt->myDoc); 14791 ctxt->myDoc = NULL; 14792 } 14793 xmlFreeParserCtxt(ctxt); 14794 14795 return ret; 14796 } 14797 #endif /* LIBXML_SAX1_ENABLED */ 14798 14799 /** 14800 * xmlCreateDocParserCtxt: 14801 * @cur: a pointer to an array of xmlChar 14802 * 14803 * Creates a parser context for an XML in-memory document. 14804 * 14805 * Returns the new parser context or NULL 14806 */ 14807 xmlParserCtxtPtr 14808 xmlCreateDocParserCtxt(const xmlChar *cur) { 14809 int len; 14810 14811 if (cur == NULL) 14812 return(NULL); 14813 len = xmlStrlen(cur); 14814 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 14815 } 14816 14817 #ifdef LIBXML_SAX1_ENABLED 14818 /** 14819 * xmlSAXParseDoc: 14820 * @sax: the SAX handler block 14821 * @cur: a pointer to an array of xmlChar 14822 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 14823 * documents 14824 * 14825 * parse an XML in-memory document and build a tree. 14826 * It use the given SAX function block to handle the parsing callback. 14827 * If sax is NULL, fallback to the default DOM tree building routines. 14828 * 14829 * Returns the resulting document tree 14830 */ 14831 14832 xmlDocPtr 14833 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 14834 xmlDocPtr ret; 14835 xmlParserCtxtPtr ctxt; 14836 xmlSAXHandlerPtr oldsax = NULL; 14837 14838 if (cur == NULL) return(NULL); 14839 14840 14841 ctxt = xmlCreateDocParserCtxt(cur); 14842 if (ctxt == NULL) return(NULL); 14843 if (sax != NULL) { 14844 oldsax = ctxt->sax; 14845 ctxt->sax = sax; 14846 ctxt->userData = NULL; 14847 } 14848 xmlDetectSAX2(ctxt); 14849 14850 xmlParseDocument(ctxt); 14851 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 14852 else { 14853 ret = NULL; 14854 xmlFreeDoc(ctxt->myDoc); 14855 ctxt->myDoc = NULL; 14856 } 14857 if (sax != NULL) 14858 ctxt->sax = oldsax; 14859 xmlFreeParserCtxt(ctxt); 14860 14861 return(ret); 14862 } 14863 14864 /** 14865 * xmlParseDoc: 14866 * @cur: a pointer to an array of xmlChar 14867 * 14868 * parse an XML in-memory document and build a tree. 14869 * 14870 * Returns the resulting document tree 14871 */ 14872 14873 xmlDocPtr 14874 xmlParseDoc(const xmlChar *cur) { 14875 return(xmlSAXParseDoc(NULL, cur, 0)); 14876 } 14877 #endif /* LIBXML_SAX1_ENABLED */ 14878 14879 #ifdef LIBXML_LEGACY_ENABLED 14880 /************************************************************************ 14881 * * 14882 * Specific function to keep track of entities references * 14883 * and used by the XSLT debugger * 14884 * * 14885 ************************************************************************/ 14886 14887 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 14888 14889 /** 14890 * xmlAddEntityReference: 14891 * @ent : A valid entity 14892 * @firstNode : A valid first node for children of entity 14893 * @lastNode : A valid last node of children entity 14894 * 14895 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 14896 */ 14897 static void 14898 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 14899 xmlNodePtr lastNode) 14900 { 14901 if (xmlEntityRefFunc != NULL) { 14902 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 14903 } 14904 } 14905 14906 14907 /** 14908 * xmlSetEntityReferenceFunc: 14909 * @func: A valid function 14910 * 14911 * Set the function to call call back when a xml reference has been made 14912 */ 14913 void 14914 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 14915 { 14916 xmlEntityRefFunc = func; 14917 } 14918 #endif /* LIBXML_LEGACY_ENABLED */ 14919 14920 /************************************************************************ 14921 * * 14922 * Miscellaneous * 14923 * * 14924 ************************************************************************/ 14925 14926 #ifdef LIBXML_XPATH_ENABLED 14927 #include <libxml/xpath.h> 14928 #endif 14929 14930 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 14931 static int xmlParserInitialized = 0; 14932 14933 /** 14934 * xmlInitParser: 14935 * 14936 * Initialization function for the XML parser. 14937 * This is not reentrant. Call once before processing in case of 14938 * use in multithreaded programs. 14939 */ 14940 14941 void 14942 xmlInitParser(void) { 14943 if (xmlParserInitialized != 0) 14944 return; 14945 14946 #ifdef LIBXML_THREAD_ENABLED 14947 __xmlGlobalInitMutexLock(); 14948 if (xmlParserInitialized == 0) { 14949 #endif 14950 xmlInitThreads(); 14951 xmlInitGlobals(); 14952 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14953 (xmlGenericError == NULL)) 14954 initGenericErrorDefaultFunc(NULL); 14955 xmlInitMemory(); 14956 xmlInitializeDict(); 14957 xmlInitCharEncodingHandlers(); 14958 xmlDefaultSAXHandlerInit(); 14959 xmlRegisterDefaultInputCallbacks(); 14960 #ifdef LIBXML_OUTPUT_ENABLED 14961 xmlRegisterDefaultOutputCallbacks(); 14962 #endif /* LIBXML_OUTPUT_ENABLED */ 14963 #ifdef LIBXML_HTML_ENABLED 14964 htmlInitAutoClose(); 14965 htmlDefaultSAXHandlerInit(); 14966 #endif 14967 #ifdef LIBXML_XPATH_ENABLED 14968 xmlXPathInit(); 14969 #endif 14970 xmlParserInitialized = 1; 14971 #ifdef LIBXML_THREAD_ENABLED 14972 } 14973 __xmlGlobalInitMutexUnlock(); 14974 #endif 14975 } 14976 14977 /** 14978 * xmlCleanupParser: 14979 * 14980 * This function name is somewhat misleading. It does not clean up 14981 * parser state, it cleans up memory allocated by the library itself. 14982 * It is a cleanup function for the XML library. It tries to reclaim all 14983 * related global memory allocated for the library processing. 14984 * It doesn't deallocate any document related memory. One should 14985 * call xmlCleanupParser() only when the process has finished using 14986 * the library and all XML/HTML documents built with it. 14987 * See also xmlInitParser() which has the opposite function of preparing 14988 * the library for operations. 14989 * 14990 * WARNING: if your application is multithreaded or has plugin support 14991 * calling this may crash the application if another thread or 14992 * a plugin is still using libxml2. It's sometimes very hard to 14993 * guess if libxml2 is in use in the application, some libraries 14994 * or plugins may use it without notice. In case of doubt abstain 14995 * from calling this function or do it just before calling exit() 14996 * to avoid leak reports from valgrind ! 14997 */ 14998 14999 void 15000 xmlCleanupParser(void) { 15001 if (!xmlParserInitialized) 15002 return; 15003 15004 xmlCleanupCharEncodingHandlers(); 15005 #ifdef LIBXML_CATALOG_ENABLED 15006 xmlCatalogCleanup(); 15007 #endif 15008 xmlDictCleanup(); 15009 xmlCleanupInputCallbacks(); 15010 #ifdef LIBXML_OUTPUT_ENABLED 15011 xmlCleanupOutputCallbacks(); 15012 #endif 15013 #ifdef LIBXML_SCHEMAS_ENABLED 15014 xmlSchemaCleanupTypes(); 15015 xmlRelaxNGCleanupTypes(); 15016 #endif 15017 xmlResetLastError(); 15018 xmlCleanupGlobals(); 15019 xmlCleanupThreads(); /* must be last if called not from the main thread */ 15020 xmlCleanupMemory(); 15021 xmlParserInitialized = 0; 15022 } 15023 15024 /************************************************************************ 15025 * * 15026 * New set (2.6.0) of simpler and more flexible APIs * 15027 * * 15028 ************************************************************************/ 15029 15030 /** 15031 * DICT_FREE: 15032 * @str: a string 15033 * 15034 * Free a string if it is not owned by the "dict" dictionary in the 15035 * current scope 15036 */ 15037 #define DICT_FREE(str) \ 15038 if ((str) && ((!dict) || \ 15039 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 15040 xmlFree((char *)(str)); 15041 15042 /** 15043 * xmlCtxtReset: 15044 * @ctxt: an XML parser context 15045 * 15046 * Reset a parser context 15047 */ 15048 void 15049 xmlCtxtReset(xmlParserCtxtPtr ctxt) 15050 { 15051 xmlParserInputPtr input; 15052 xmlDictPtr dict; 15053 15054 if (ctxt == NULL) 15055 return; 15056 15057 dict = ctxt->dict; 15058 15059 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 15060 xmlFreeInputStream(input); 15061 } 15062 ctxt->inputNr = 0; 15063 ctxt->input = NULL; 15064 15065 ctxt->spaceNr = 0; 15066 if (ctxt->spaceTab != NULL) { 15067 ctxt->spaceTab[0] = -1; 15068 ctxt->space = &ctxt->spaceTab[0]; 15069 } else { 15070 ctxt->space = NULL; 15071 } 15072 15073 15074 ctxt->nodeNr = 0; 15075 ctxt->node = NULL; 15076 15077 ctxt->nameNr = 0; 15078 ctxt->name = NULL; 15079 15080 DICT_FREE(ctxt->version); 15081 ctxt->version = NULL; 15082 DICT_FREE(ctxt->encoding); 15083 ctxt->encoding = NULL; 15084 DICT_FREE(ctxt->directory); 15085 ctxt->directory = NULL; 15086 DICT_FREE(ctxt->extSubURI); 15087 ctxt->extSubURI = NULL; 15088 DICT_FREE(ctxt->extSubSystem); 15089 ctxt->extSubSystem = NULL; 15090 if (ctxt->myDoc != NULL) 15091 xmlFreeDoc(ctxt->myDoc); 15092 ctxt->myDoc = NULL; 15093 15094 ctxt->standalone = -1; 15095 ctxt->hasExternalSubset = 0; 15096 ctxt->hasPErefs = 0; 15097 ctxt->html = 0; 15098 ctxt->external = 0; 15099 ctxt->instate = XML_PARSER_START; 15100 ctxt->token = 0; 15101 15102 ctxt->wellFormed = 1; 15103 ctxt->nsWellFormed = 1; 15104 ctxt->disableSAX = 0; 15105 ctxt->valid = 1; 15106 #if 0 15107 ctxt->vctxt.userData = ctxt; 15108 ctxt->vctxt.error = xmlParserValidityError; 15109 ctxt->vctxt.warning = xmlParserValidityWarning; 15110 #endif 15111 ctxt->record_info = 0; 15112 ctxt->nbChars = 0; 15113 ctxt->checkIndex = 0; 15114 ctxt->inSubset = 0; 15115 ctxt->errNo = XML_ERR_OK; 15116 ctxt->depth = 0; 15117 ctxt->charset = XML_CHAR_ENCODING_UTF8; 15118 ctxt->catalogs = NULL; 15119 ctxt->nbentities = 0; 15120 ctxt->sizeentities = 0; 15121 ctxt->sizeentcopy = 0; 15122 xmlInitNodeInfoSeq(&ctxt->node_seq); 15123 15124 if (ctxt->attsDefault != NULL) { 15125 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 15126 ctxt->attsDefault = NULL; 15127 } 15128 if (ctxt->attsSpecial != NULL) { 15129 xmlHashFree(ctxt->attsSpecial, NULL); 15130 ctxt->attsSpecial = NULL; 15131 } 15132 15133 #ifdef LIBXML_CATALOG_ENABLED 15134 if (ctxt->catalogs != NULL) 15135 xmlCatalogFreeLocal(ctxt->catalogs); 15136 #endif 15137 if (ctxt->lastError.code != XML_ERR_OK) 15138 xmlResetError(&ctxt->lastError); 15139 } 15140 15141 /** 15142 * xmlCtxtResetPush: 15143 * @ctxt: an XML parser context 15144 * @chunk: a pointer to an array of chars 15145 * @size: number of chars in the array 15146 * @filename: an optional file name or URI 15147 * @encoding: the document encoding, or NULL 15148 * 15149 * Reset a push parser context 15150 * 15151 * Returns 0 in case of success and 1 in case of error 15152 */ 15153 int 15154 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 15155 int size, const char *filename, const char *encoding) 15156 { 15157 xmlParserInputPtr inputStream; 15158 xmlParserInputBufferPtr buf; 15159 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 15160 15161 if (ctxt == NULL) 15162 return(1); 15163 15164 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 15165 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 15166 15167 buf = xmlAllocParserInputBuffer(enc); 15168 if (buf == NULL) 15169 return(1); 15170 15171 if (ctxt == NULL) { 15172 xmlFreeParserInputBuffer(buf); 15173 return(1); 15174 } 15175 15176 xmlCtxtReset(ctxt); 15177 15178 if (ctxt->pushTab == NULL) { 15179 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 15180 sizeof(xmlChar *)); 15181 if (ctxt->pushTab == NULL) { 15182 xmlErrMemory(ctxt, NULL); 15183 xmlFreeParserInputBuffer(buf); 15184 return(1); 15185 } 15186 } 15187 15188 if (filename == NULL) { 15189 ctxt->directory = NULL; 15190 } else { 15191 ctxt->directory = xmlParserGetDirectory(filename); 15192 } 15193 15194 inputStream = xmlNewInputStream(ctxt); 15195 if (inputStream == NULL) { 15196 xmlFreeParserInputBuffer(buf); 15197 return(1); 15198 } 15199 15200 if (filename == NULL) 15201 inputStream->filename = NULL; 15202 else 15203 inputStream->filename = (char *) 15204 xmlCanonicPath((const xmlChar *) filename); 15205 inputStream->buf = buf; 15206 xmlBufResetInput(buf->buffer, inputStream); 15207 15208 inputPush(ctxt, inputStream); 15209 15210 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 15211 (ctxt->input->buf != NULL)) { 15212 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); 15213 size_t cur = ctxt->input->cur - ctxt->input->base; 15214 15215 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 15216 15217 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); 15218 #ifdef DEBUG_PUSH 15219 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 15220 #endif 15221 } 15222 15223 if (encoding != NULL) { 15224 xmlCharEncodingHandlerPtr hdlr; 15225 15226 if (ctxt->encoding != NULL) 15227 xmlFree((xmlChar *) ctxt->encoding); 15228 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15229 15230 hdlr = xmlFindCharEncodingHandler(encoding); 15231 if (hdlr != NULL) { 15232 xmlSwitchToEncoding(ctxt, hdlr); 15233 } else { 15234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 15235 "Unsupported encoding %s\n", BAD_CAST encoding); 15236 } 15237 } else if (enc != XML_CHAR_ENCODING_NONE) { 15238 xmlSwitchEncoding(ctxt, enc); 15239 } 15240 15241 return(0); 15242 } 15243 15244 15245 /** 15246 * xmlCtxtUseOptionsInternal: 15247 * @ctxt: an XML parser context 15248 * @options: a combination of xmlParserOption 15249 * @encoding: the user provided encoding to use 15250 * 15251 * Applies the options to the parser context 15252 * 15253 * Returns 0 in case of success, the set of unknown or unimplemented options 15254 * in case of error. 15255 */ 15256 static int 15257 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 15258 { 15259 if (ctxt == NULL) 15260 return(-1); 15261 if (encoding != NULL) { 15262 if (ctxt->encoding != NULL) 15263 xmlFree((xmlChar *) ctxt->encoding); 15264 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 15265 } 15266 if (options & XML_PARSE_RECOVER) { 15267 ctxt->recovery = 1; 15268 options -= XML_PARSE_RECOVER; 15269 ctxt->options |= XML_PARSE_RECOVER; 15270 } else 15271 ctxt->recovery = 0; 15272 if (options & XML_PARSE_DTDLOAD) { 15273 ctxt->loadsubset = XML_DETECT_IDS; 15274 options -= XML_PARSE_DTDLOAD; 15275 ctxt->options |= XML_PARSE_DTDLOAD; 15276 } else 15277 ctxt->loadsubset = 0; 15278 if (options & XML_PARSE_DTDATTR) { 15279 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 15280 options -= XML_PARSE_DTDATTR; 15281 ctxt->options |= XML_PARSE_DTDATTR; 15282 } 15283 if (options & XML_PARSE_NOENT) { 15284 ctxt->replaceEntities = 1; 15285 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 15286 options -= XML_PARSE_NOENT; 15287 ctxt->options |= XML_PARSE_NOENT; 15288 } else 15289 ctxt->replaceEntities = 0; 15290 if (options & XML_PARSE_PEDANTIC) { 15291 ctxt->pedantic = 1; 15292 options -= XML_PARSE_PEDANTIC; 15293 ctxt->options |= XML_PARSE_PEDANTIC; 15294 } else 15295 ctxt->pedantic = 0; 15296 if (options & XML_PARSE_NOBLANKS) { 15297 ctxt->keepBlanks = 0; 15298 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 15299 options -= XML_PARSE_NOBLANKS; 15300 ctxt->options |= XML_PARSE_NOBLANKS; 15301 } else 15302 ctxt->keepBlanks = 1; 15303 if (options & XML_PARSE_DTDVALID) { 15304 ctxt->validate = 1; 15305 if (options & XML_PARSE_NOWARNING) 15306 ctxt->vctxt.warning = NULL; 15307 if (options & XML_PARSE_NOERROR) 15308 ctxt->vctxt.error = NULL; 15309 options -= XML_PARSE_DTDVALID; 15310 ctxt->options |= XML_PARSE_DTDVALID; 15311 } else 15312 ctxt->validate = 0; 15313 if (options & XML_PARSE_NOWARNING) { 15314 ctxt->sax->warning = NULL; 15315 options -= XML_PARSE_NOWARNING; 15316 } 15317 if (options & XML_PARSE_NOERROR) { 15318 ctxt->sax->error = NULL; 15319 ctxt->sax->fatalError = NULL; 15320 options -= XML_PARSE_NOERROR; 15321 } 15322 #ifdef LIBXML_SAX1_ENABLED 15323 if (options & XML_PARSE_SAX1) { 15324 ctxt->sax->startElement = xmlSAX2StartElement; 15325 ctxt->sax->endElement = xmlSAX2EndElement; 15326 ctxt->sax->startElementNs = NULL; 15327 ctxt->sax->endElementNs = NULL; 15328 ctxt->sax->initialized = 1; 15329 options -= XML_PARSE_SAX1; 15330 ctxt->options |= XML_PARSE_SAX1; 15331 } 15332 #endif /* LIBXML_SAX1_ENABLED */ 15333 if (options & XML_PARSE_NODICT) { 15334 ctxt->dictNames = 0; 15335 options -= XML_PARSE_NODICT; 15336 ctxt->options |= XML_PARSE_NODICT; 15337 } else { 15338 ctxt->dictNames = 1; 15339 } 15340 if (options & XML_PARSE_NOCDATA) { 15341 ctxt->sax->cdataBlock = NULL; 15342 options -= XML_PARSE_NOCDATA; 15343 ctxt->options |= XML_PARSE_NOCDATA; 15344 } 15345 if (options & XML_PARSE_NSCLEAN) { 15346 ctxt->options |= XML_PARSE_NSCLEAN; 15347 options -= XML_PARSE_NSCLEAN; 15348 } 15349 if (options & XML_PARSE_NONET) { 15350 ctxt->options |= XML_PARSE_NONET; 15351 options -= XML_PARSE_NONET; 15352 } 15353 if (options & XML_PARSE_COMPACT) { 15354 ctxt->options |= XML_PARSE_COMPACT; 15355 options -= XML_PARSE_COMPACT; 15356 } 15357 if (options & XML_PARSE_OLD10) { 15358 ctxt->options |= XML_PARSE_OLD10; 15359 options -= XML_PARSE_OLD10; 15360 } 15361 if (options & XML_PARSE_NOBASEFIX) { 15362 ctxt->options |= XML_PARSE_NOBASEFIX; 15363 options -= XML_PARSE_NOBASEFIX; 15364 } 15365 if (options & XML_PARSE_HUGE) { 15366 ctxt->options |= XML_PARSE_HUGE; 15367 options -= XML_PARSE_HUGE; 15368 if (ctxt->dict != NULL) 15369 xmlDictSetLimit(ctxt->dict, 0); 15370 } 15371 if (options & XML_PARSE_OLDSAX) { 15372 ctxt->options |= XML_PARSE_OLDSAX; 15373 options -= XML_PARSE_OLDSAX; 15374 } 15375 if (options & XML_PARSE_IGNORE_ENC) { 15376 ctxt->options |= XML_PARSE_IGNORE_ENC; 15377 options -= XML_PARSE_IGNORE_ENC; 15378 } 15379 if (options & XML_PARSE_BIG_LINES) { 15380 ctxt->options |= XML_PARSE_BIG_LINES; 15381 options -= XML_PARSE_BIG_LINES; 15382 } 15383 ctxt->linenumbers = 1; 15384 return (options); 15385 } 15386 15387 /** 15388 * xmlCtxtUseOptions: 15389 * @ctxt: an XML parser context 15390 * @options: a combination of xmlParserOption 15391 * 15392 * Applies the options to the parser context 15393 * 15394 * Returns 0 in case of success, the set of unknown or unimplemented options 15395 * in case of error. 15396 */ 15397 int 15398 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 15399 { 15400 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 15401 } 15402 15403 /** 15404 * xmlDoRead: 15405 * @ctxt: an XML parser context 15406 * @URL: the base URL to use for the document 15407 * @encoding: the document encoding, or NULL 15408 * @options: a combination of xmlParserOption 15409 * @reuse: keep the context for reuse 15410 * 15411 * Common front-end for the xmlRead functions 15412 * 15413 * Returns the resulting document tree or NULL 15414 */ 15415 static xmlDocPtr 15416 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 15417 int options, int reuse) 15418 { 15419 xmlDocPtr ret; 15420 15421 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 15422 if (encoding != NULL) { 15423 xmlCharEncodingHandlerPtr hdlr; 15424 15425 hdlr = xmlFindCharEncodingHandler(encoding); 15426 if (hdlr != NULL) 15427 xmlSwitchToEncoding(ctxt, hdlr); 15428 } 15429 if ((URL != NULL) && (ctxt->input != NULL) && 15430 (ctxt->input->filename == NULL)) 15431 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 15432 xmlParseDocument(ctxt); 15433 if ((ctxt->wellFormed) || ctxt->recovery) 15434 ret = ctxt->myDoc; 15435 else { 15436 ret = NULL; 15437 if (ctxt->myDoc != NULL) { 15438 xmlFreeDoc(ctxt->myDoc); 15439 } 15440 } 15441 ctxt->myDoc = NULL; 15442 if (!reuse) { 15443 xmlFreeParserCtxt(ctxt); 15444 } 15445 15446 return (ret); 15447 } 15448 15449 /** 15450 * xmlReadDoc: 15451 * @cur: a pointer to a zero terminated string 15452 * @URL: the base URL to use for the document 15453 * @encoding: the document encoding, or NULL 15454 * @options: a combination of xmlParserOption 15455 * 15456 * parse an XML in-memory document and build a tree. 15457 * 15458 * Returns the resulting document tree 15459 */ 15460 xmlDocPtr 15461 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 15462 { 15463 xmlParserCtxtPtr ctxt; 15464 15465 if (cur == NULL) 15466 return (NULL); 15467 xmlInitParser(); 15468 15469 ctxt = xmlCreateDocParserCtxt(cur); 15470 if (ctxt == NULL) 15471 return (NULL); 15472 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15473 } 15474 15475 /** 15476 * xmlReadFile: 15477 * @filename: a file or URL 15478 * @encoding: the document encoding, or NULL 15479 * @options: a combination of xmlParserOption 15480 * 15481 * parse an XML file from the filesystem or the network. 15482 * 15483 * Returns the resulting document tree 15484 */ 15485 xmlDocPtr 15486 xmlReadFile(const char *filename, const char *encoding, int options) 15487 { 15488 xmlParserCtxtPtr ctxt; 15489 15490 xmlInitParser(); 15491 ctxt = xmlCreateURLParserCtxt(filename, options); 15492 if (ctxt == NULL) 15493 return (NULL); 15494 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 15495 } 15496 15497 /** 15498 * xmlReadMemory: 15499 * @buffer: a pointer to a char array 15500 * @size: the size of the array 15501 * @URL: the base URL to use for the document 15502 * @encoding: the document encoding, or NULL 15503 * @options: a combination of xmlParserOption 15504 * 15505 * parse an XML in-memory document and build a tree. 15506 * 15507 * Returns the resulting document tree 15508 */ 15509 xmlDocPtr 15510 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 15511 { 15512 xmlParserCtxtPtr ctxt; 15513 15514 xmlInitParser(); 15515 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 15516 if (ctxt == NULL) 15517 return (NULL); 15518 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15519 } 15520 15521 /** 15522 * xmlReadFd: 15523 * @fd: an open file descriptor 15524 * @URL: the base URL to use for the document 15525 * @encoding: the document encoding, or NULL 15526 * @options: a combination of xmlParserOption 15527 * 15528 * parse an XML from a file descriptor and build a tree. 15529 * NOTE that the file descriptor will not be closed when the 15530 * reader is closed or reset. 15531 * 15532 * Returns the resulting document tree 15533 */ 15534 xmlDocPtr 15535 xmlReadFd(int fd, const char *URL, const char *encoding, int options) 15536 { 15537 xmlParserCtxtPtr ctxt; 15538 xmlParserInputBufferPtr input; 15539 xmlParserInputPtr stream; 15540 15541 if (fd < 0) 15542 return (NULL); 15543 xmlInitParser(); 15544 15545 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15546 if (input == NULL) 15547 return (NULL); 15548 input->closecallback = NULL; 15549 ctxt = xmlNewParserCtxt(); 15550 if (ctxt == NULL) { 15551 xmlFreeParserInputBuffer(input); 15552 return (NULL); 15553 } 15554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15555 if (stream == NULL) { 15556 xmlFreeParserInputBuffer(input); 15557 xmlFreeParserCtxt(ctxt); 15558 return (NULL); 15559 } 15560 inputPush(ctxt, stream); 15561 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15562 } 15563 15564 /** 15565 * xmlReadIO: 15566 * @ioread: an I/O read function 15567 * @ioclose: an I/O close function 15568 * @ioctx: an I/O handler 15569 * @URL: the base URL to use for the document 15570 * @encoding: the document encoding, or NULL 15571 * @options: a combination of xmlParserOption 15572 * 15573 * parse an XML document from I/O functions and source and build a tree. 15574 * 15575 * Returns the resulting document tree 15576 */ 15577 xmlDocPtr 15578 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 15579 void *ioctx, const char *URL, const char *encoding, int options) 15580 { 15581 xmlParserCtxtPtr ctxt; 15582 xmlParserInputBufferPtr input; 15583 xmlParserInputPtr stream; 15584 15585 if (ioread == NULL) 15586 return (NULL); 15587 xmlInitParser(); 15588 15589 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15590 XML_CHAR_ENCODING_NONE); 15591 if (input == NULL) { 15592 if (ioclose != NULL) 15593 ioclose(ioctx); 15594 return (NULL); 15595 } 15596 ctxt = xmlNewParserCtxt(); 15597 if (ctxt == NULL) { 15598 xmlFreeParserInputBuffer(input); 15599 return (NULL); 15600 } 15601 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15602 if (stream == NULL) { 15603 xmlFreeParserInputBuffer(input); 15604 xmlFreeParserCtxt(ctxt); 15605 return (NULL); 15606 } 15607 inputPush(ctxt, stream); 15608 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 15609 } 15610 15611 /** 15612 * xmlCtxtReadDoc: 15613 * @ctxt: an XML parser context 15614 * @cur: a pointer to a zero terminated string 15615 * @URL: the base URL to use for the document 15616 * @encoding: the document encoding, or NULL 15617 * @options: a combination of xmlParserOption 15618 * 15619 * parse an XML in-memory document and build a tree. 15620 * This reuses the existing @ctxt parser context 15621 * 15622 * Returns the resulting document tree 15623 */ 15624 xmlDocPtr 15625 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 15626 const char *URL, const char *encoding, int options) 15627 { 15628 xmlParserInputPtr stream; 15629 15630 if (cur == NULL) 15631 return (NULL); 15632 if (ctxt == NULL) 15633 return (NULL); 15634 xmlInitParser(); 15635 15636 xmlCtxtReset(ctxt); 15637 15638 stream = xmlNewStringInputStream(ctxt, cur); 15639 if (stream == NULL) { 15640 return (NULL); 15641 } 15642 inputPush(ctxt, stream); 15643 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15644 } 15645 15646 /** 15647 * xmlCtxtReadFile: 15648 * @ctxt: an XML parser context 15649 * @filename: a file or URL 15650 * @encoding: the document encoding, or NULL 15651 * @options: a combination of xmlParserOption 15652 * 15653 * parse an XML file from the filesystem or the network. 15654 * This reuses the existing @ctxt parser context 15655 * 15656 * Returns the resulting document tree 15657 */ 15658 xmlDocPtr 15659 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 15660 const char *encoding, int options) 15661 { 15662 xmlParserInputPtr stream; 15663 15664 if (filename == NULL) 15665 return (NULL); 15666 if (ctxt == NULL) 15667 return (NULL); 15668 xmlInitParser(); 15669 15670 xmlCtxtReset(ctxt); 15671 15672 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 15673 if (stream == NULL) { 15674 return (NULL); 15675 } 15676 inputPush(ctxt, stream); 15677 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 15678 } 15679 15680 /** 15681 * xmlCtxtReadMemory: 15682 * @ctxt: an XML parser context 15683 * @buffer: a pointer to a char array 15684 * @size: the size of the array 15685 * @URL: the base URL to use for the document 15686 * @encoding: the document encoding, or NULL 15687 * @options: a combination of xmlParserOption 15688 * 15689 * parse an XML in-memory document and build a tree. 15690 * This reuses the existing @ctxt parser context 15691 * 15692 * Returns the resulting document tree 15693 */ 15694 xmlDocPtr 15695 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 15696 const char *URL, const char *encoding, int options) 15697 { 15698 xmlParserInputBufferPtr input; 15699 xmlParserInputPtr stream; 15700 15701 if (ctxt == NULL) 15702 return (NULL); 15703 if (buffer == NULL) 15704 return (NULL); 15705 xmlInitParser(); 15706 15707 xmlCtxtReset(ctxt); 15708 15709 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 15710 if (input == NULL) { 15711 return(NULL); 15712 } 15713 15714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15715 if (stream == NULL) { 15716 xmlFreeParserInputBuffer(input); 15717 return(NULL); 15718 } 15719 15720 inputPush(ctxt, stream); 15721 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15722 } 15723 15724 /** 15725 * xmlCtxtReadFd: 15726 * @ctxt: an XML parser context 15727 * @fd: an open file descriptor 15728 * @URL: the base URL to use for the document 15729 * @encoding: the document encoding, or NULL 15730 * @options: a combination of xmlParserOption 15731 * 15732 * parse an XML from a file descriptor and build a tree. 15733 * This reuses the existing @ctxt parser context 15734 * NOTE that the file descriptor will not be closed when the 15735 * reader is closed or reset. 15736 * 15737 * Returns the resulting document tree 15738 */ 15739 xmlDocPtr 15740 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 15741 const char *URL, const char *encoding, int options) 15742 { 15743 xmlParserInputBufferPtr input; 15744 xmlParserInputPtr stream; 15745 15746 if (fd < 0) 15747 return (NULL); 15748 if (ctxt == NULL) 15749 return (NULL); 15750 xmlInitParser(); 15751 15752 xmlCtxtReset(ctxt); 15753 15754 15755 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 15756 if (input == NULL) 15757 return (NULL); 15758 input->closecallback = NULL; 15759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15760 if (stream == NULL) { 15761 xmlFreeParserInputBuffer(input); 15762 return (NULL); 15763 } 15764 inputPush(ctxt, stream); 15765 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15766 } 15767 15768 /** 15769 * xmlCtxtReadIO: 15770 * @ctxt: an XML parser context 15771 * @ioread: an I/O read function 15772 * @ioclose: an I/O close function 15773 * @ioctx: an I/O handler 15774 * @URL: the base URL to use for the document 15775 * @encoding: the document encoding, or NULL 15776 * @options: a combination of xmlParserOption 15777 * 15778 * parse an XML document from I/O functions and source and build a tree. 15779 * This reuses the existing @ctxt parser context 15780 * 15781 * Returns the resulting document tree 15782 */ 15783 xmlDocPtr 15784 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 15785 xmlInputCloseCallback ioclose, void *ioctx, 15786 const char *URL, 15787 const char *encoding, int options) 15788 { 15789 xmlParserInputBufferPtr input; 15790 xmlParserInputPtr stream; 15791 15792 if (ioread == NULL) 15793 return (NULL); 15794 if (ctxt == NULL) 15795 return (NULL); 15796 xmlInitParser(); 15797 15798 xmlCtxtReset(ctxt); 15799 15800 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 15801 XML_CHAR_ENCODING_NONE); 15802 if (input == NULL) { 15803 if (ioclose != NULL) 15804 ioclose(ioctx); 15805 return (NULL); 15806 } 15807 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 15808 if (stream == NULL) { 15809 xmlFreeParserInputBuffer(input); 15810 return (NULL); 15811 } 15812 inputPush(ctxt, stream); 15813 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 15814 } 15815 15816 #define bottom_parser 15817 #include "elfgcchack.h" 15818